Last active
December 14, 2015 02:39
-
-
Save dmarx/5015605 to your computer and use it in GitHub Desktop.
Users were invited to state the next number in a sequence. I was interested in seeing which numbers were skipped or repeated. Resultant graph from scrape at 2013-2-22 13:34 EST: http://i.imgur.com/yOFOfIX.png
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import praw | |
| from collections import Counter | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| useragent='investigating a RAOA post' | |
| r = praw.Reddit(useragent) | |
| def get_comments(subm_id='190wmg'): | |
| subm=r.get_submission(submission_id=subm_id) | |
| c_gen = subm.all_comments_flat | |
| comments = [] | |
| for c in c_gen: | |
| comments.append(c.body) | |
| return comments | |
| def get_vals(comments): | |
| sequence = Counter() | |
| for c in comments: | |
| done = False | |
| test = c[:5] | |
| while not done and len(test)>0: | |
| try: | |
| val=int(test) | |
| sequence[val]+=1 | |
| done=True | |
| except Exception, e: | |
| test = test[:-1].strip() | |
| continue | |
| return sequence | |
| def plot_hist(vals): | |
| s = pd.Series(vals) | |
| missing = [i for i in range(1,int(s.index.max())+1) if i not in s.index] | |
| s2 = pd.Series([0]*len(missing), index=missing) | |
| s=s.add(s2, fill_value=0) | |
| s.plot(kind='bar') | |
| plt.show() | |
| return missing | |
| def main(): | |
| comments = get_comments() | |
| vals = get_vals(comments) | |
| missed = plot_hist(vals) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment