Skip to content

Instantly share code, notes, and snippets.

@c-rhodes
Created November 25, 2014 14:26
Show Gist options
  • Save c-rhodes/95e6222d872dcfce0075 to your computer and use it in GitHub Desktop.
Save c-rhodes/95e6222d872dcfce0075 to your computer and use it in GitHub Desktop.
"""
Reddit DailyProgrammer Challenge #190 - Webscraping Sentiments.
http://bit.ly/11sBRa0
"""
import requests
from bs4 import BeautifulSoup
comments_url = 'http://www.youtube.com/all_comments?v={video_id}'
happy = ['love','loved','like','liked','awesome','amazing','good','great','excellent']
sad = ['hate','hated','dislike','disliked','awful','terrible','bad','painful','worst']
def scrape_video_comments(video_id):
"""Given a Youtube video id return
a list of the videos comments."""
response = requests.get(comments_url.format(video_id=video_id))
soup = BeautifulSoup(response.content)
comments = soup.findAll('div', {'class': 'comment-text-content'})
comments = [comment.text for comment in comments if comment not in ['', ' ']]
return comments
def happy_or_sad_comment(comment):
"""Given a comment return 1 if there are more
happy words, or -1 if more sad words, else return 0"""
happy_words = sum(1 for word in comment.split(' ') if word in happy)
sad_words = sum(1 for word in comment.split(' ') if word in sad)
return dict(happy=happy_words, sad=sad_words)
def happy_or_sad(video_id):
comments = scrape_video_comments(video_id)
sad = 0
happy = 0
for comment in comments:
happy_sad_count = happy_or_sad_comment(comment)
sad += happy_sad_count.get('sad')
happy += happy_sad_count.get('happy')
output = """\0
From a sample size of {no_comments}. This video is mostly {verdict},
it contained {happy} happy keywords and {sad} sad
keywords.
""".format(no_comments=len(comments),
verdict='Happy' if happy > sad else 'Sad',
happy=happy, sad=sad)
print(output)
if __name__ == '__main__':
import sys
happy_or_sad(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment