Skip to content

Instantly share code, notes, and snippets.

@nwithan8
Created October 21, 2024 18:15
Show Gist options
  • Save nwithan8/b355460495198b3c7a55a86e0eeed4ef to your computer and use it in GitHub Desktop.
Save nwithan8/b355460495198b3c7a55a86e0eeed4ef to your computer and use it in GitHub Desktop.
Analyze live emotions of RedditCFB game thread
import enum
from datetime import datetime
import dotenv
import praw
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Load the environment variables
config = dotenv.dotenv_values(".redditcfb_sentiment_analysis_env")
# Initialize the PRAW Reddit API client
reddit = praw.Reddit(
client_id=config['PRAW_CLIENT_ID'],
client_secret=config['PRAW_CLIENT_SECRET'],
user_agent=config['PRAW_USER_AGENT'],
username=config['PRAW_USERNAME'],
password=config['PRAW_PASSWORD']
)
class Analyzer(enum.Enum):
TEXTBLOB = "textblob"
VADER = "vader"
NEUTRAL_SCORE_THRESHOLD = 0.2
MOST_POSITIVE = "Extremely positive"
MORE_POSITIVE = "Very positive"
POSITIVE = "Positive"
NEUTRAL = "Neutral"
NEGATIVE = "Negative"
MORE_NEGATIVE = "Very negative"
MOST_NEGATIVE = "Extremely negative"
HEAT_RANGES = {
MOST_NEGATIVE: (-1, -0.8),
MORE_NEGATIVE: (-0.8, -0.6),
NEGATIVE: (-0.6, -NEUTRAL_SCORE_THRESHOLD),
NEUTRAL: (-NEUTRAL_SCORE_THRESHOLD, NEUTRAL_SCORE_THRESHOLD),
POSITIVE: (NEUTRAL_SCORE_THRESHOLD, 0.6),
MORE_POSITIVE: (0.6, 0.8),
MOST_POSITIVE: (0.8, 1)
}
SUMMARY_MESSAGE_TEMPLATE = """
Current Vibes, according to math:
General feeling: {general_feeling} ({average_score})
Positive comments: {positive_count}
Negative comments: {negative_count}
Most positive comment: {positive_feeling} ({max_score})
Most negative comment: {negative_feeling} ({min_score})
Standard deviation: {standard_deviation}
"""
def calculate_standard_deviation(values: list[float]) -> float:
"""
Calculate the standard deviation of a list of values.
:param values: The list of values
:return: The standard deviation of the values
"""
n = len(values)
mean = sum(values) / n
variance = sum((x - mean) ** 2 for x in values) / n
return variance ** 0.5
def get_heat_range(score: float) -> str:
"""
Get the heat range of a sentiment score.
:param score: The sentiment score
:return: The heat range of the sentiment score
"""
for heat_range, (lower_bound, upper_bound) in HEAT_RANGES.items():
if lower_bound <= score < upper_bound:
return heat_range
if score == 1: # Handle the edge case of a score of 1
return MOST_POSITIVE
return 'unknown'
class SentimentAnalysisStats:
raw_scores: list[float]
total_score: float
average_score: float
min_score: float
max_score: float
positive_count: int
negative_count: int
neutral_count: int
standard_deviation: float
def __init__(self, raw_scores: list[float], neutral_score_threshold: float = 0.2):
self.raw_scores = raw_scores
self.total_score = sum(raw_scores)
self.average_score = sum(raw_scores) / len(raw_scores)
self.min_score = min(raw_scores)
self.max_score = max(raw_scores)
self.positive_count = len([score for score in raw_scores if score > neutral_score_threshold])
self.negative_count = len([score for score in raw_scores if score < -neutral_score_threshold])
self.neutral_count = len(
[score for score in raw_scores if -neutral_score_threshold <= score <= neutral_score_threshold])
self.standard_deviation = calculate_standard_deviation(values=raw_scores)
def get_sentiment_textblob(text: str) -> float:
"""
Get the sentiment of a text. The sentiment is a float value between -1 (negative) and 1 (positive).
:param text: The text to analyze
:return: The sentiment of the text
"""
# Create a TextBlob object
blob = TextBlob(text)
# Get the sentiment of the text
return blob.sentiment.polarity
def get_sentiment_vader(text: str, analyzer: SentimentIntensityAnalyzer) -> float:
"""
Get the sentiment of a text. The sentiment is a float value between -1 (negative) and 1 (positive).
:param text: The text to analyze
:param analyzer: The sentiment analyzer
:return: The sentiment of the text
"""
# Get the sentiment of the text
return analyzer.polarity_scores(text)['compound']
def get_sentiment_stats_from_comments(comments: list[praw.reddit.models.Comment],
analyzer: Analyzer,
neutral_score_threshold: float = 0.2) -> SentimentAnalysisStats:
"""
Get the average, minimum, and maximum and standard deviation of the sentiment of a list of comments.
:param comments: The list of comments
:param analyzer: The sentiment analyzer to use
:param neutral_score_threshold: The threshold for considering a sentiment score as neutral
:return: Statistics of the sentiment of the comments
"""
if analyzer == Analyzer.TEXTBLOB:
sentiment_scores = [get_sentiment_textblob(text=comment.body) for comment in comments]
elif analyzer == Analyzer.VADER:
# Initialize the VADER sentiment analyzer once, more efficient
vader_sentiment_analyzer = SentimentIntensityAnalyzer()
sentiment_scores = [get_sentiment_vader(text=comment.body, analyzer=vader_sentiment_analyzer) for comment in
comments]
else:
raise ValueError("Invalid sentiment analyzer")
return SentimentAnalysisStats(raw_scores=sentiment_scores, neutral_score_threshold=neutral_score_threshold)
def comment_is_recent(comment, time_limit_timestamp) -> bool:
"""
Check if a comment is recent based on a time limit timestamp.
:param comment: The comment
:param time_limit_timestamp: The time limit timestamp
:return: True if the comment is recent, False otherwise
"""
return comment.created_utc >= time_limit_timestamp
def collect_comments(thread_id: str, comment_limit: int = 100, time_limit_minutes: int = 10) \
-> list[praw.reddit.models.Comment]:
"""
Collect recent comments from a Reddit thread.
:param thread_id: The ID of the thread
:param comment_limit: The maximum number of comments to fetch
:param time_limit_minutes: The time limit in minutes to fetch comments
:return: A list of comments
"""
current_time = datetime.utcnow().timestamp() # Best to set this BEFORE retrieving all the comments, to lower chances of getting comments older than the time limit
thread = reddit.submission(id=thread_id)
# Fetch (up to) the 100 most recent comments
thread.comments.replace_more(limit=0) # Remove the "MoreComments" objects
comments = thread.comments.list()[:comment_limit]
# Filter comments based on the time limit
# First, check the time of the last comment compared to the time limit
last_comment = comments[-1]
time_limit_timestamp = current_time - time_limit_minutes * 60
if comment_is_recent(comment=last_comment, time_limit_timestamp=time_limit_timestamp):
# If the last comment is within the time limit, we're done
return comments
else:
# Need to remove comments that are older than the time limit
return [comment for comment in comments if
comment_is_recent(comment=comment, time_limit_timestamp=time_limit_timestamp)]
def get_snapshot_summary_of_thread(thread_id: str,
analyzer: Analyzer,
comment_limit: int = 100,
time_limit_minutes: int = 10) -> str:
"""
Get a analysis snapshot summary of a Reddit thread.
:param thread_id: The ID of the thread
:param analyzer: The sentiment analyzer to use
:param comment_limit: The maximum number of comments to fetch
:param time_limit_minutes: The time limit in minutes to fetch comments
:return: A summary of the thread analysis
"""
comments: list[praw.reddit.models.Comment] = collect_comments(thread_id=thread_id, comment_limit=comment_limit,
time_limit_minutes=time_limit_minutes)
if not comments:
return "No comments found in the thread."
sentiment_statistics = get_sentiment_stats_from_comments(comments=comments, analyzer=analyzer,
neutral_score_threshold=0.2)
summary_message = SUMMARY_MESSAGE_TEMPLATE.format(
general_feeling=get_heat_range(sentiment_statistics.average_score),
average_score=f"{sentiment_statistics.average_score:.3f}",
positive_count=sentiment_statistics.positive_count,
negative_count=sentiment_statistics.negative_count,
positive_feeling=get_heat_range(sentiment_statistics.max_score),
max_score=sentiment_statistics.max_score,
negative_feeling=get_heat_range(sentiment_statistics.min_score),
min_score=sentiment_statistics.min_score,
standard_deviation=sentiment_statistics.standard_deviation
)
return summary_message
if __name__ == '__main__':
game_thread_id = '1g7p1ln' # Georgia vs. Texas 2024 4th Quarter Game Thread
analyzer_type = Analyzer.VADER # VADER seems to be more extreme (accurate) than TextBlob, but makes for more fun results
summary = get_snapshot_summary_of_thread(thread_id=game_thread_id, analyzer=analyzer_type, comment_limit=100,
time_limit_minutes=100000000) # In real-time, this would be 10 minutes, but for testing purposes, we set it to a large number to avoid getting no comments
print(summary)
@nwithan8
Copy link
Author

Output:

Current Vibes, according to math:

General feeling: Neutral (-0.116)
Positive comments: 23
Negative comments: 42
Most positive comment: Extremely positive (0.9173)
Most negative comment: Extremely negative (-0.9109)
Standard deviation: 0.44973478817965584

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment