Skip to content

Instantly share code, notes, and snippets.

@MattSandy
Created June 2, 2016 18:46
Show Gist options
  • Save MattSandy/e24a0af50884de8ce68e66158fef5fe8 to your computer and use it in GitHub Desktop.
Save MattSandy/e24a0af50884de8ce68e66158fef5fe8 to your computer and use it in GitHub Desktop.
Reddit Scraper
var http = require('http');
var https = require('https');
var fs = require('fs');
var post_array = [];
var user_array = [];
fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('done')});
fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('done')});
var subreddits = ["AskReddit", "politics", "The_Donald", "funny", "nba", "Overwatch", "gaming", "pics", "news", "todayilearned", "videos", "movies", "worldnews", "soccer", "relationships", "WTF", "SandersForPresident", "AdviceAnimals", "gifs", "leagueoflegends", "aww", "BlackPeopleTwitter", "hockey", "SquaredCircle", "gameofthrones", "nfl", "GlobalOffensive", "h3h3productions", "arrow", "DotA2", "pcmasterrace", "hiphopheads", "me_irl", "Showerthoughts", "science", "Mr_Trump", "Games", "mildlyinteresting", "asoiaf", "technology", "hillaryclinton", "IAmA", "4chan", "hearthstone", "TumblrInAction", "magicTCG", "Jokes", "PoliticalDiscussion", "tifu", "baseball", "CFB", "enoughsandersspam", "KotakuInAction", "CringeAnarchy", "anime", "wow", "NoMansSkyTheGame", "MMA", "trees", "Android", "TrollXChromosomes", "FlashTV", "interestingasfuck", "creepy", "dataisbeautiful", "explainlikeimfive", "television", "nottheonion", "cringepics", "Music", "darksouls3", "Fitness", "2007scape", "bestof", "OldSchoolCool", "reddevils", "SubredditDrama", "legaladvice", "AskMen", "canada", "Futurology", "unitedkingdom", "LifeProTips", "TwoXChromosomes", "stevenuniverse", "formula1", "atheism", "europe", "AskWomen", "photoshopbattles", "programming", "reactiongifs", "youtubehaiku", "pokemon", "rupaulsdragrace", "woahdude", "DestinyTheGame", "smashbros", "civ", "personalfinance", "HillaryForPrison", "Tinder", "comicbooks", "Marvel", "Smite", "fatlogic", "Celebs", "PS4", "Unexpected", "xboxone", "fo4", "thedivision", "food", "JUSTNOMIL", "childfree", "comics", "Fallout", "motorcycles", "CrappyDesign", "marvelstudios", "StarWars", "announcements", "MakeupAddiction", "DIY", "bodybuilding", "askscience", "EarthPorn", "books", "sports", "space", "InternetIsBeautiful", "gadgets", "history", "GetMotivated", "Documentaries", "listentothis", "philosophy", "UpliftingNews", "Art", "nosleep", "WritingPrompts", "sex", "malefashionadvice", "fffffffuuuuuuuuuuuu", "Frugal", "YouShouldKnow", "HistoryPorn", "AskHistorians", "lifehacks", "oddlysatisfying", "tattoos", "Minecraft", "JusticePorn", "FoodPorn", "OutOfTheLoop", "facepalm", "cringe", "wheredidthesodago", "wallpapers", "freebies", "gentlemanboners", "GameDeals", "buildapc", "conspiracy", "Cooking", "TrueReddit", "cats", "humor", "shittyaskscience", "loseit", "apple", "talesfromtechsupport", "baconreader", "skyrim", "NetflixBestOf", "spaceporn", "geek", "AbandonedPorn", "offbeat", "EatCheapAndHealthy", "RoomPorn", "h3h3_productions", "MattHossZone", "1200isjerky", "ConfusingGravity", "TheoryOfMaM", "TickTockManitowoc", "PlantProgress2016", "Thaltar", "Competitiveoverwatch", "OverwatchUniversity", "LibertarianPartyUSA", "theavalanches", "beautytalkph", "FactoryIdleGame", "Diepio", "OverwatchLFT", "badscience", "McJuggerNuggets", "ShitpostingforBernie", "PublicHealthWatch", "OliveMUA", "TheDao", "keming", "Overwatchmemes", "WredditCountryClub", "survivorrankdownIII", "madlads", "Flume", "UnexpectedMouthCatch", "GodhoodWB", "snapmap", "IndiaSpeaks", "Falloutdadjokes", "Truecels", "Preacher", "StallmanWasRight", "GhostRecon", "Tradelands", "CadenMoranDairy", "TheDangerousFaggot", "thrice", "CWArrow", "TalesFromEMS", "WerthamInAction", "BreeEssrig", "COMPLETEANARCHY", "EndlessFrontier", "nothingeverhappens", "TODispensaries", "mspaintbattles", "EnoughTrumpSpam", "hmmm", "wikiwhat", "YeezyOrDie", "edefreiheit", "ikeahacks", "Cloud9", "rarepuppers", "UnconventionalMakeup", "Battlecars", "Earth199999", "RSChronicle", "ethtrader", "wildlifephotography", "Lisk", "fixingmovies", "NamFlashbacks", "ScienceFacts", "DebbyRyan", "TaliyahMains", "lizgillies", "Midair", "unknownvideos", "TheStrokes", "assignedmale", "ReallyBigShow", "perlediritaly", "whiteknighting", "sexover30", "EmpireDidNothingWrong", "unchartedmultiplayer", "flippyshit", "tarantinogifs", "2meirl4meirl", "FFBraveExvius", "OldSchoolCelebs", "megane", "epicnamebro", "nvidia", "Idubbbz", "ThePathHulu", "ElectricSkateboarding", "9M9H9E9", "Granblue_en", "pumparum", "BikiniBottomTwitter", "G59", "SuddenlyGay", "freefolk", "mauramurray", "rcigarsbattlezone", "Pauper", "elderscrollslegends", "Vue", "babyrhinogifs", "MaliciousCompliance", "rubberducks", "indianpeoplefacebook", "PeakyBlinders", "MtvChallenge", "Kossacks_for_Sanders", "ToastCrumbs", "hoi4", "AroundTheNFL", "TheBarons", "AccidentalRenaissance", "FemraMeta", "kfanservice", "smoobypost", "Stronglifts5x5", "Saber", "Doom", "trashy", "Stellaris", "thatHappened", "MLS", "tf2", "OnePiece", "meirl", "totalwar", "justneckbeardthings", "pathofexile", "cars", "RocketLeague", "HighQualityGifs", "de", "Vive", "pcgaming", "australia", "oculus", "Eve", "Gunners", "forwardsfromgrandma", "spacex", "streetwear", "TalesFromRetail", "survivor", "gamegrumps", "SanJoseSharks", "india", "kpop", "FellowKids", "roosterteeth", "GifRecipes", "heroesofthestorm", "sweden", "fireemblem", "guns", "toronto", "quityourbullshit", "LiverpoolFC", "anime_irl", "RoastMe", "manga", "Justrolledintotheshop", "tumblr", "EliteDangerous", "osugame", "ClashRoyale", "ukpolitics", "DnD", "halo", "Warframe", "whowouldwin", "PublicFreakout", "exmormon", "rickandmorty", "mildlyinfuriating", "france", "Drugs", "Roadcam", "witcher", "iamverysmart", "polandball", "penguins", "teenagers", "warriors", "ShitRedditSays", "Undertale", "TwoBestFriendsPlay", "TheRedPill", "OkCupid", "Battleborn", "runescape", "brasil", "MapPorn", "DCcomics", "Kappa", "ireland", "Conservative", "sysadmin", "ProgrammerHumor", "Libertarian", "RWBY", "nonononoyes", "chicago", "Seattle", "Rainbow6", "AnimalsBeingJerks", "HumansBeingBros", "Portland", "FULLCOMMUNISM", "Whatcouldgowrong", "circlejerk", "NoStupidQuestions", "niceguys", "argentina", "lego", "photography", "firstworldanarchists", "Steam", "thewalkingdead", "shutupandtakemymoney", "QuotesPorn", "LearnUselessTalents", "dadjokes", "everymanshouldknow", "DoesAnybodyElse", "travel", "learnprogramming", "doctorwho", "holdmybeer", "Economics", "scifi", "changemyview", "harrypotter", "bodyweightfitness", "DepthHub", "seduction", "PerfectTiming", "AlienBlue", "breakingbad", "youdontsurf", "BuyItForLife", "linux", "psychology", "fullmoviesonyoutube", "UnexpectedThugLife", "SkincareAddiction", "slowcooking", "business", "frugalmalefashion", "pettyrevenge", "offmychest", "investing", "battlestations", "running", "whatisthisthing", "Entrepreneur", "outside", "standupshots", "CrazyIdeas", "minimalism", "itookapicture", "Eyebleach", "MURICA", "NoFap", "keto", "Bitcoin", "starcraft", "beer", "instant_regret", "AnimalsBeingBros", "FanTheories", "Guitar", "progresspics", "KenM", "reallifedoodles", "IWantToLearn", "writing", "entertainment", "GrandTheftAutoV", "bicycling", "creepyPMs", "DeepIntoYouTube", "getdisciplined", "ArtisanVideos", "chemicalreactiongifs", "Damnthatsinteresting", "Meditation", "behindthegifs", "iphone", "gamedev", "tipofmytongue", "LadyBoners", "wallpaper", "recipes", "howto", "ZeroWaste", "explainlikeIAmA", "600euro", "lootcratespoilers", "MealPrepSunday", "highlevelkarma", "Wetshaving", "fashionsouls", "mtgoxinsolvency", "RPDRDRAMA", "WorldofTanksConsole", "metal_me_irl", "OnBenchNow", "The_Farage", "boostedboards", "avengersacademygame", "superleague", "iamverybadass", "TheInnBetween", "ethereum", "battlewagon", "gimlet", "minipainting", "NarutoFanfiction", "roleplayponies", "DatBoi", "SovietWomble", "PNWS", "maisiewilliams", "Thisismylifemeow", "ggggg", "TalesofLink", "maker", "Triangl", "headsupbaseball", "badukpolitics", "instantbarbarians", "SovereigntyAscending", "GCSE", "goodlongposts", "uncharted", "teenmom", "shittankiessay", "BirdsForScale", "shitguncontrollerssay", "Slitherio", "SpaceXMasterrace", "neckbeardstories", "Ice_Poseidon", "dankmemes", "pokemongo", "VaporwaveArt", "thegrandtour", "TopMindsOfReddit", "popheads", "metametacanada", "AnimalsBeingConfused", "dank_meme", "antinatalism", "hamiltonmusical", "GGFreeForAll", "hcteams", "Devoted", "TheSilphRoad", "ik_ihe", "RobinHood", "oddlyterrifying", "HomeDepot", "TurtleFacts", "cleganebowl", "BeautyGuruChat", "StevenAveryIsGuilty", "GalaxyS7", "battlefield_one", "totallynotrobots", "MandelaEffect", "raisedbyborderlines", "awardtravel", "DMDadJokes", "SVU", "GIRLSundPANZER", "FullScorpion", "MoonriseMusicFestival", "StarlightStage", "kurdistan", "BadRocketLeagueGoals", "redditmobile", "Thunder", "sjwhate", "amazonecho", "IronThronePowers", "europeannationalism", "RedPillWives", "WhitePeopleTwitter", "ImaginaryLeviathans", "GreenDawn", "TownofSalemgame", "Bestbuy", "awfuleyebrows", "holdmycatnip", "CrackStatus", "xmen", "NashvilleTV", "YookaLaylee", "paragon", "Trucking", "MouseReview", "yourmomshousepodcast", "LateStageCapitalism", "CowChop", "hapas", "FCCincinnati", "FantasyWarTactics", "AnybodyButHillary", "UndertaleCringe", "BleachBraveSouls", "gifsthatendtoosoon", "Skookum", "houston", "Austin", "radiohead", "Christianity", "vegan", "electronic_cigarette", "confession", "MensRights", "Guildwars2", "Drama", "Warthunder", "noveltranslations", "syriancivilwar", "opieandanthony", "Philippines", "britishproblems", "canucks", "starcitizen", "AFL", "homestuck", "ffxiv", "vancouver", "wallstreetbets", "DC_Cinematic", "teslamotors", "Sneakers", "Denmark", "indieheads", "shittyfoodporn", "Amd", "boston", "newsokur", "Mariners", "funhaus", "newzealand", "circlebroke", "trippinthroughtime", "GamerGhazi", "techsupportgore", "natureismetal", "askgaybros", "OSHA", "tennis", "FIFA", "Planetside", "whitepeoplegifs", "eu4", "raisedbynarcissists", "AskScienceFiction", "MechanicalKeyboards", "blackops3", "socialism", "Romania", "mylittlepony", "nrl", "BabyBumps", "CODZombies", "dayz", "thenetherlands", "TampaBayLightning", "Metal", "nyc", "awwnime", "ArcherFX", "IASIP", "Calgary", "clevelandcavs", "xxfitness", "paradoxplaza", "bleach", "CombatFootage", "Suomi", "weddingplanning", "bjj", "darksouls", "ShitAmericansSay", "nintendo", "UnresolvedMysteries", "UNBGBBIIVCHIDCTIICBG", "TheBluePill", "blackdesertonline", "torontoraptors", "MilitaryPorn", "BravoRealHousewives", "ColorizedHistory", "cosplaygirls", "Supernatural", "Foodforthought", "woodworking", "netsec", "LetsNotMeet", "fantasyfootball", "southpark", "firstworldproblems", "GamePhysics", "math", "web_design", "adventuretime", "CityPorn", "wikipedia", "oldpeoplefacebook", "howtonotgiveafuck", "LucidDreaming", "ContagiousLaughter", "Astronomy", "worldpolitics", "shittyreactiongifs", "Diablo", "environment", "electronicmusic", "shittyrobots", "futurama", "community", "zelda", "TheLastAirbender", "cosplay", "self", "theydidthemath", "buildapcsales", "educationalgifs", "nostalgia", "boardgames", "IdiotsFightingThings", "nononono", "gardening", "truegaming", "batman", "carporn", "montageparodies", "depression", "PenmanshipPorn", "TopGear", "Physics", "Design", "fitmeals", "nocontext", "FloridaMan", "socialskills", "KerbalSpaceProgram", "WeAreTheMusicMakers", "financialindependence", "SubredditSimulator", "femalefashionadvice", "Python", "rage", "ANormalDayInRussia", "Homebrewing", "CasualConversation", "ThingsCutInHalfPorn", "AMA", "WastedGifs", "PrettyGirls", "Coffee", "gif", "TheSimpsons", "relationship_advice", "Frisson", "CampingandHiking", "Pareidolia", "3DS", "subredditoftheday", "jailbreak", "webdev", "Health", "HumanPorn", "dogs", "asmr", "blunderyears", "calvinandhobbes", "ExpectationVsReality", "RetroFuturism", "TorontoMarlies", "slideforreddit", "SanctionedSuicide", "IDontWorkHereLady", "FUTMobile", "TeamSolomid", "fargo", "GTAorRussia", "BokuNoHeroAcademia", "discordapp", "twice", "Daredevil", "esports", "TrueOffMyChest", "JusticeServed", "SoulsSliders", "askTO", "hottiesfortrump", "DesirePath", "forwardsfromreddit", "pyrocynical", "ZeroEscape", "Instantregret", "drunkenpeasants", "BeAmazed", "KylieJenner", "savedyouaclick", "orphanblack", "ultrawidemasterrace", "HelloInternet", "SummerReddit", "FiestaST", "southamerica", "ForeverUnwanted", "ConservativesOnly", "TooMeIrlForMeIrl", "BrasildoB", "customhearthstone", "batty", "Rivenmains", "GaryJohnson", "askhillarysupporters", "pennystocks", "scamp", "Xiaomi", "JUSTNOFAMILY", "Weakendgunnit", "brasilivre", "BatFacts", "mitchjones", "lastimages", "GradeAUnderA", "proED", "IHE", "RealLifeFootball", "sticker", "HiTMAN", "WormFanfic", "badmathematics", "ageofsigmar", "7kglobal", "sexyhair", "NotMyJob", "TumblrPls", "RunningCirclejerk", "misanthropy", "Cr1TiKaL", "PlanetCoaster", "awfuleverything", "SympatheticMonsters", "seinfeldgifs", "shitpostemblem", "criticalrole", "CaptainAmerica", "teefies", "bitsoup", "UnisonLeague", "aSongOfMemesAndRage", "FashionReps", "Weakpots", "depaul", "DicePorn", "churning", "nbastreams", "AskTrumpSupporters", "ChanceTheRapper", "CLG", "Eesti", "PennyDreadful", "AskPhysics", "TsumTsum", "Tiresaretheenemy", "reactjs", "theflash", "GFD", "BadSocialScience", "ttcafterloss", "shittydarksouls", "thebachelor", "zootopia", "DBZDokkanBattle", "SonyAlpha", "xTrill", "ApocalypseRising", "comicbookcollecting", "cablefail", "ErinAshford", "CatsAreAssholes", "unpopularopinion", "TheCinemassacre", "Hillary", "Military", "stlouisblues", "Kanye", "golf", "btc", "Shitty_Car_Mods", "amiugly", "ottawa", "Atlanta", "bestoflegaladvice", "DunderMifflin", "actuallesbians", "thisismylifenow", "playrust", "summonerschool", "LosAngeles", "windowsphone", "feedthebeast", "AsianBeauty", "Cricket", "podemos", "Parenting", "breakingmom", "Yogscast", "ClashOfClans", "Torontobluejays", "melbourne", "ProRevenge", "hardbodies", "london", "redsox", "GlobalOffensiveTrade", "Gamingcirclejerk", "fivenightsatfreddys", "islam", "lewronggeneration", "CollegeBasketball", "asktransgender", "fantasybaseball", "dbz", "CrusaderKings", "BestOfOutrageCulture", "Anarcho_Capitalism", "sanfrancisco", "StreetFighter", "blackpeoplegifs", "medicalschool", "japan", "im14andthisisdeep", "pussypassdenied", "ProtectAndServe", "startrek", "StardustCrusaders", "army", "italy", "chelseafc", "philadelphia", "WorldOfWarships", "WorldofTanks", "nevertellmetheodds", "Watches", "Naruto", "AirForce", "CHICubs", "bindingofisaac", "Denver", "kpics", "StarWarsBattlefront", "badwomensanatomy", "StartledCats", "SiliconValleyHBO", "portugal", "CoDCompetitive", "muacjdiscussion", "PuzzleAndDragons", "corgi", "ShitWehraboosSay", "sydney", "masseffect", "lgbt", "drunk", "Cinemagraphs", "engineering", "netflix", "ZenHabits", "coolguides", "Glitch_in_the_Matrix", "arresteddevelopment", "Awwducational", "beermoney", "cordcutters", "beards", "quotes", "raspberry_pi", "gamernews", "fatpeoplestories", "startups", "MadeMeSmile", "Psychonaut", "somethingimade", "DesignPorn", "CitiesSkylines", "ImaginaryLandscapes", "productivity", "redditgetsdrawn", "foodhacks", "AskCulinary", "compsci", "skeptic", "DecidingToBeBetter", "lotr", "ChildrenFallingOver", "rpg", "EDC", "notinteresting", "betterCallSaul", "Fantasy", "perfectloops", "classic4chan", "friendsafari", "unexpectedjihad", "budgetfood", "Buddhism", "lolcats", "hacking", "casualiama", "horror", "ImaginaryMonsters", "battlefield_4", "TrueFilm", "Paranormal", "vinyl", "answers", "babyelephantgifs", "r4r", "yoga", "graphic_design", "Cyberpunk", "tech", "happy", "WebGames", "delusionalartists", "literature", "urbanexploration", "wiiu", "HIMYM", "javascript", "hardware", "diablo3", "techsupport", "gainit", "dubstep", "Justfuckmyshitup", "ofcoursethatsathing", "WhatsInThisThing", "thalassophobia", "onetruegod", "MarvelPuzzleQuest", "hockeyquestionmark", "reckful", "summonerswar", "ask_transgender", "ContestOfChampions", "YasuoMains", "EliteOne", "aznidentity", "TheAdventureZone", "hdtgm", "ArenaHS", "engrish", "DankNation", "SCJerk", "FurryKikPals", "Rengarmains", "rantgrumps", "roblox", "rpdrcirclejerk", "AskBernieSupporters", "TrueAnime", "incest_relationships", "Jeep", "RainbowSixSiege", "saltandsanctuary", "Animemes", "kindafunny", "HelpMeFind", "bangtan", "DebateFascism", "GenderCritical", "wwesupercard", "benzodiazepines", "kitchener", "TheMassive", "LivestreamFail", "RedHotChiliPeppers", "funfacts", "Veep", "IdiotsInCars", "Jeopardy", "VaporwaveAesthetics", "battlecats", "IgnorantImgur", "JustEngaged", "subnautica", "communism101", "collegebaseball", "LigaMX", "LoveLive", "bikewrench", "BostonTerrier", "panicatthedisco", "CharacterRant", "liberta", "UnearthedArcana", "ebikes", "AgainstHateSubreddits", "Nexus6P", "Kerala", "BeachCity", "breathinginformation", "kronos2wow", "6thForm", "TwoSentenceHorror", "vexillology", "NeverTrump", "wowservers", "40kLore", "DnDBehindTheScreen", "MLBStreams", "ArenaFPS", "BigCatGifs", "BetterEveryLoop", "jillstein", "Nisekoi", "peloton", "sugarlifestyleforum", "badpolitics", "trump", "FitAndNatural", "SoundsLikeMusic", "danganronpa", "sadcringe", "PrimitiveTechnology", "MGTOW", "forsen", "headpats", "Porsche", "3dshacks", "Staples", "OliviaMunn", "CFA", "mirrorsedge", "GGdiscussion", "Gender_Critical", "StuffOnCats", "funkopop", "badeconomics", "MLBTheShow", "Knife_Swap", "vainglorygame", "pebble", "grilling", "astoria", "PeopleFuckingDying", "LatinaCuties", "Scotland", "shitpost", "Feminism", "singapore", "Bad_Cop_No_Donut", "SSBM", "aviation", "counting", "bladeandsoul", "subaru", "NASCAR", "washingtondc", "HunterXHunter", "StardewValley", "leafs", "CFBOffTopic", "TrollYChromosome", "gaybros", "CanadaPolitics", "bravefrontier", "ForeverAlone", "sandiego", "muacirclejerk", "lakers", "MLPLounge", "cscareerquestions", "pitbulls", "dwarffortress", "hawks", "Patriots", "Terraria", "Cynicalbrit", "JonTron", "Boxing", "TheAmericans", "fountainpens", "swtor", "Aquariums", "badtattoos", "Autos", "furry", "stopdrinking", "starterpacks", "minnesotavikings", "FFRecordKeeper", "croatia", "metalgearsolid", "Defenders", "rugbyunion", "circlebroke2", "MEOW_IRL", "AskAnAmerican", "BigBrother", "Anarchism", "dndnext", "elderscrollsonline", "OopsDidntMeanTo", "EDH", "bayarea", "PKA", "ElectricForest", "yugioh", "Catholicism", "EnoughLibertarianSpam", "weightroom", "google", "Paleo", "wowthissubexists", "InteriorDesign", "tldr", "webcomics", "wicked_edge", "Baking", "androidapps", "dogpictures", "Anxiety", "PandR", "mashups", "misleadingthumbnails", "Pizza", "drawing", "jobs", "simpleliving", "FancyFollicles", "Filmmakers", "dogecoin", "climbing", "Survival", "AnimalsBeingDerps", "biology", "bertstrips", "DarkSouls2", "Wellthatsucks", "Marijuana", "finance", "TrueAskReddit", "EmmaWatson", "CatsStandingUp", "TheWayWeWere", "Enhancement", "combinedgifs", "AntiJokes", "toosoon", "Heavymind", "skateboarding", "firefly", "ketorecipes", "PropagandaPosters", "worldbuilding", "backpacking", "patientgamers", "TinyHouses", "RedditLaqueristas", "Nootropics", "SocialEngineering", "linguistics", "StandUpComedy", "realasians", "architecture", "ExposurePorn", "Sherlock", "malelivingspace", "ShittyLifeProTips", "cogsci", "coding", "ArchitecturePorn", "Rainmeter", "edmproduction", "NeutralPolitics", "Prematurecelebration", "medicine", "malelifestyle", "popping", "shortscarystories", "secretsanta", "languagelearning", "headphones", "AndroidGaming", "tea", "AdrenalinePorn", "StonerEngineering", "Graffiti", "GTAV", "memes", "Borderlands", "OpTicGaming", "Repsneakers", "InjusticeMobile", "copypasta", "yuruyuri", "PoliticalVideo", "WhiteWolfRPG", "walmart", "FFXV", "LegendsOfTomorrow", "sixers", "atletico", "iranian", "metaljerk", "Bushcraft", "CAguns", "askportland", "canadients", "Vaporwave", "birding", "uberdrivers", "msp", "deepweb", "TimCanova", "GameStop", "Colorization", "ChicagoSuburbs", "PSVR", "unrealtournament", "Addons4Kodi", "Polska", "Juve", "FocusST", "ERB", "ptcgo", "caseyneistat", "40kOrkScience", "samharris", "CCJ2", "Storyshift", "WeatherGifs", "Brunei", "Canadian_ecigarette", "Multicopter", "sales", "instantkarma", "FutureFight", "sophieturner", "Vaping101", "iZombie", "shittyprogramming", "ElderScrolls", "crappyoffbrands", "GearVR", "FalloutMods", "newtothenavy", "CompetitiveHS", "steamr", "SchoolIdolFestival", "Wicca", "PhotoshopRequest", "tipofmyjoystick", "greece", "NBA_Draft", "illegaltorrents", "snapchat", "dontdeadopeninside", "macdemarco", "AyyMD", "Chattanooga", "overlord", "BellaThorne", "rollercoasters", "CherokeeXJ", "homegym", "youtube", "Brampton", "pkmntcg", "Waxpen", "republicwireless", "ptsd", "hardwareswap", "galaxynote5", "ProductTesting", "MAA2", "RetroPie", "changelog", "araragigirls", "killerinstinct", "FilthyFrank", "warcraftlore", "NorthKoreaNews", "emacs", "linuxmasterrace", "socialjustice101", "RCSources", "animelegs", "nihilism", "InfertilityBabies", "Berserk", "ChoosingBeggars", "podcasts", "ukulele", "Wrangler", "SakuraGakuin", "CastleClash", "ShokugekiNoSoma", "newsokunomoral", "NewYorkMets", "Rabbits", "treeofsavior", "coys", "discgolf", "MonsterHunter", "softwaregore", "shield", "DeadBedrooms", "Battlefield", "rocketbeans", "spaceengineers", "Negareddit", "AdamCarolla", "WrestleWithThePlot", "poker", "supremeclothing", "SWGalaxyOfHeroes", "China", "Dallas", "promos", "Earwolf", "FinalFantasy", "PurplePillDebate", "KitchenConfidential", "Dodgers", "Edmonton", "Fishing", "exjw", "LSD", "TalesFromYourServer", "ukraina", "KCRoyals", "Pathfinder_RPG", "opiates", "mexico", "howardstern", "Metalcore", "Windows10", "korea", "Gotham", "4PanelCringe", "blackladies", "powerlifting", "SFGiants", "Simulated", "splatoon", "woof_irl", "survivetheculling", "DebateReligion", "starbucks", "Blep", "Monstercat", "3Dprinting", "UkrainianConflict", "Vaping", "TheDepthsBelow", "chemistry", "EverythingScience", "selfimprovement", "classicalmusic", "theocho", "MachinePorn", "MoviePosterPorn", "SWARJE", "HailCorporate", "PS3", "MakingaMurderer", "HomeImprovement", "HouseOfCards", "FifthWorldPics", "Punny", "shittyadvice", "IndieGaming", "networking", "redditsync", "DadReflexes", "ThriftStoreHauls", "xkcd", "cableporn", "bloodborne", "awesome", "MachineLearning", "Perfectfit", "disney", "VolleyballGirls", "nutrition", "arduino", "astrophotography", "michaelbaygifs", "Ubuntu", "privacy", "futurebeats", "badhistory", "torrents", "economy", "musictheory", "TheGirlSurvivalGuide", "audiophile", "swoleacceptance", "catpranks", "fifthworldproblems", "swedishproblems", "Twitch", "techsupportmacgyver", "malehairadvice", "snowboarding", "TrueDetective", "trap", "Jazz", "lifeofnorman", "illusionporn", "svenskpolitik", "waterporn", "eatsandwiches", "electronics", "AskSocialScience", "AppHookup", "windows", "Offensive_Wallpapers", "eFreebies", "forhire", "TheoryOfReddit", "AnimalPorn", "ImaginaryTechnology", "trackers", "undelete", "camping", "Infographics", "EDM", "crafts", "EngineeringPorn", "longboarding", "KarmaConspiracy", "TrueAtheism", "LearnJapanese", "learnpython", "steamdeals", "NotTimAndEric", "audioengineering", "EngineeringStudents", "introvert", "1200isplenty", "ArtefactPorn", "awwwtf", "aliens", "AskDocs", "dancemoms", "INDYCAR", "wargame", "greenday", "Mcat", "CompTIA", "Advice", "BPD", "factorio", "Brawlhalla", "norfolk", "researchchemicals", "civ5", "ExNoContact", "supergirlTV", "CK2GameOfthrones", "TryingForABaby", "security", "ThisIsNotASafeSpace", "HBOGameofThrones", "RoversMorningGlory", "Buffalo", "weed", "Emo", "gamindustri", "ROTC", "djiphantom", "megalinks", "AskEurope", "boogie2988", "ScenesFromAHat", "CraftBeer", "trucksim", "Authentic_Vaping", "Paladins", "landscaping", "comedybangbang", "sociopath", "pagan", "subredditcancer", "southafrica", "MostBeautiful", "MasterchefAU", "ftm", "SS13", "datascience", "AccidentalRacism", "DiWHY", "likeus", "ABCDesis", "mazda3", "RedvsBlue", "galatasaray", "veganfitness", "dubai", "woweconomy", "fortwayne", "chile", "BeautifulFemales", "WildernessBackpacking", "biggestproblem", "osureport", "ProjectFi", "marriedredpill", "RedLetterMedia", "FlarrowPorn", "whatsthisrock", "Albany", "JessicaJones", "Drumpf", "Trophies", "submechanophobia", "ibs", "onebag", "exchristian", "razer", "AcademicBiblical", "vita", "beyondthebump", "HFY", "Firearms", "nursing", "PoliticalHumor", "linux_gaming", "baltimore", "NYYankees", "HalfLife", "Xcom", "crossfit", "uwaterloo", "playark", "civbattleroyale", "spikes", "SketchDaily", "Columbus", "ainbow", "Braves", "dragonage", "Winnipeg", "pittsburgh", "moviescirclejerk", "mallninjashit", "Flyers", "electricdaisycarnival", "TexasRangers", "daddit", "crochet", "weekendgunnit", "badphilosophy", "DenverBroncos", "Turkey", "rockets", "bonnaroo", "Seahawks", "Random_Acts_Of_Amazon", "BitcoinMarkets", "OutreachHPG", "GreenBayPackers", "BMW", "BABYMETAL", "belgium", "GrassrootsSelect", "Rowing", "nashville", "Warhammer40k", "WarshipPorn", "The100", "KingdomHearts", "CapitalismVSocialism", "CalamariRaceTeam", "Cubers", "bostonceltics", "EnterTheGungeon", "Cardinals", "chess", "CGPGrey", "alberta", "exmuslim", "Accounting", "SSBPM", "Barca", "CCW", "nhl", "booksuggestions", "csgobetting", "catpictures", "AnimalCrossing", "AdPorn", "powerwashingporn", "retiredgif", "ObscureMedia", "newreddits", "LoLeventVoDs", "stocks", "Piracy", "IWantOut", "Chromecast", "facebookwins", "bannedfromclubpenguin", "energy", "mechanical_gifs", "BollywoodRealism", "lockpicking", "MeanJokes", "mac", "RelayForReddit", "ifyoulikeblank", "creepypasta", "starbound", "marketing", "hiking", "java", "microgrowery", "DestructionPorn", "GifSound", "homestead", "trailerparkboys", "serialpodcast", "GunPorn", "sloths", "soccerstreams", "UFOs", "AskEngineers", "SkyPorn", "alternativeart", "greentext", "gallifrey", "Thetruthishere", "ADHD", "ExplainLikeImCalvin", "androiddev", "RandomActsOfGaming", "knives", "tall", "vaporents", "conspiratard", "JapaneseGameShows", "Fireteams", "ArtPorn", "RedditDayOf", "InfrastructurePorn", "Scotch", "3amjokes", "coloringcorruptions", "smallbusiness", "education", "vegetarian", "FreeEBOOKS", "solotravel", "punchablefaces", "VillagePorn", "skyrimmods", "guitarlessons", "gopro", "MTB", "eldertrees", "speedrun", "Cheap_Meals", "typography", "bitchimabus", "glitch_art", "happycrowds", "C25K", "androidthemes", "HybridAnimals", "assassinscreed", "excel", "readit", "Screenwriting", "ABraThatFits", "Images", "twitchplayspokemon", "Austria", "UCSD", "Guiltygear", "babybigcatgifs", "chromeos", "UtahJazz", "AmateurRoomPorn", "GenderCynical", "Target", "DerekSmart", "HomeNetworking", "HongKong", "TameImpala", "eliteexplorers", "CatTaps", "faeria", "alexandradaddario", "tressless", "Swingers", "confessions", "Kirby", "watercooling", "onewordeach", "Monitors", "norge", "Barcelona", "blackcats", "bassfishing", "deOhneRegeln", "Boise", "nexus5x", "lolwat", "Parahumans", "MensLib", "steak", "cataclysmdda", "thefighterandthekid", "Harley", "KimKardashianPics", "pokemontrades", "asoiafcirclejerk", "TalesFromTheFrontDesk", "slavelabour", "kancolle", "Makeup", "JETProgramme", "samsung", "phillies", "InterdimensionalCable", "Drag", "ableton", "Padres", "SocialJusticeInAction", "blogsnark", "csgo", "Nerf", "HeroesandGenerals", "consulting", "fitbit", "vegas", "barstoolsports", "transgendercirclejerk", "badroommates", "mentalhealth", "Audi", "statuegropers", "Musicthemetime", "WonderTrade", "NintendoNX", "sto", "askwomenadvice", "Firewatch", "learntodraw", "CompoundMedia", "GolfGTI", "makeyourchoice", "AirBnB", "powerrangers", "fragrance", "Gameboy", "Infinitewarfare", "kittens", "NoSleepOOC", "amateur_boxing", "custommagic", "askcarsales", "ToyotaTacoma", "ARKone", "celeb_redheads", "WordAvalanches"];
for (var i=0;i<subreddits.length; i++) {
scrape_hot("",1,subreddits[i]);
}
function scrape_hot(after,page,subreddit) {
var url = "https://www.reddit.com/r/" + subreddit + "/.json?after=" + after;
https.get(url, function(res){
var body = '';
res.on('data', function(chunk){
body += chunk;
});
res.on('end', function(){
try {
var response = JSON.parse(body);
for(var i=0;i<response.data.children.length;i++) {
//if not logged already
if(post_array.indexOf(response.data.children[i].data.id) === -1) {
//save to array
post_array.push(response.data.children[i].data.id);
//setup line to write to file
var line = response.data.children[i].data.author + ',' + response.data.children[i].data.id + ',' +
format_date(response.data.children[i].data.created) + ',' + response.data.children[i].data.num_comments + ',' +
response.data.children[i].data.score + ',' + response.data.children[i].data.stickied + ',' +
'hot,' + subreddit + "\n";
console.log(line);
//get/write user information
scrape_user(response.data.children[i].data.author);
//write line
fs.appendFile('posts.csv', line, function (err) {
//error
});
}
}
if(page<40) {
scrape_hot(response.data.after,page+1,subreddit);
}
} catch (err) {
console.log(err);
scrape_hot(after,page,subreddit);
}
});
}).on('error', function(e){
scrape_hot(after,page,subreddit);
});
}
function scrape_user(user) {
if(user_array.indexOf(user) === -1) {
user_array.push(user);
var url = "https://www.reddit.com/user/" + user + "/about.json";
https.get(url, function(res){
var body = '';
res.on('data', function(chunk){
body += chunk;
});
res.on('end', function(){
try {
var response = JSON.parse(body);
var line = user + "," + format_date(response.data.created) + "\n";
fs.appendFile('users.csv', line, function (err) {
//success
});
} catch (err) {
scrape_user(user);
}
});
}).on('error', function(e){
console.log("Got an error: ", e);
scrape_user(user);
});
}
}
function format_date(date) {
var date = new Date(date*1000);
var yyyy = date.getFullYear().toString();
var mm = (date.getMonth()+1).toString(); // getMonth() is zero-based
var dd = date.getDate().toString();
return yyyy + "-" + (mm[1]?mm:"0"+mm[0]) + "-" + (dd[1]?dd:"0"+dd[0]); // padding
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment