Skip to content

Instantly share code, notes, and snippets.

@dmarx
Last active December 14, 2015 02:39
Show Gist options
  • Select an option

  • Save dmarx/5015605 to your computer and use it in GitHub Desktop.

Select an option

Save dmarx/5015605 to your computer and use it in GitHub Desktop.
Users were invited to state the next number in a sequence. I was interested in seeing which numbers were skipped or repeated. Resultant graph from scrape at 2013-2-22 13:34 EST: http://i.imgur.com/yOFOfIX.png
import praw
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
useragent='investigating a RAOA post'
r = praw.Reddit(useragent)
def get_comments(subm_id='190wmg'):
subm=r.get_submission(submission_id=subm_id)
c_gen = subm.all_comments_flat
comments = []
for c in c_gen:
comments.append(c.body)
return comments
def get_vals(comments):
sequence = Counter()
for c in comments:
done = False
test = c[:5]
while not done and len(test)>0:
try:
val=int(test)
sequence[val]+=1
done=True
except Exception, e:
test = test[:-1].strip()
continue
return sequence
def plot_hist(vals):
s = pd.Series(vals)
missing = [i for i in range(1,int(s.index.max())+1) if i not in s.index]
s2 = pd.Series([0]*len(missing), index=missing)
s=s.add(s2, fill_value=0)
s.plot(kind='bar')
plt.show()
return missing
def main():
comments = get_comments()
vals = get_vals(comments)
missed = plot_hist(vals)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment