nwithan8 · October 21, 2024 18:15 · nwithan8 · Oct 21, 2024
diff --git a/redditcfb_game_thread_sentiment_analysis.py b/redditcfb_game_thread_sentiment_analysis.py
 import enum
 from datetime import datetime

 import dotenv
 import praw
 from textblob import TextBlob
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

 # Load the environment variables
 config = dotenv.dotenv_values(".redditcfb_sentiment_analysis_env")

 # Initialize the PRAW Reddit API client
 reddit = praw.Reddit(
    client_id=config['PRAW_CLIENT_ID'],
    client_secret=config['PRAW_CLIENT_SECRET'],
    user_agent=config['PRAW_USER_AGENT'],
    username=config['PRAW_USERNAME'],
    password=config['PRAW_PASSWORD']
 )


 class Analyzer(enum.Enum):
    TEXTBLOB = "textblob"
    VADER = "vader"


 NEUTRAL_SCORE_THRESHOLD = 0.2
 MOST_POSITIVE = "Extremely positive"
 MORE_POSITIVE = "Very positive"
 POSITIVE = "Positive"
 NEUTRAL = "Neutral"
 NEGATIVE = "Negative"
 MORE_NEGATIVE = "Very negative"
 MOST_NEGATIVE = "Extremely negative"

 HEAT_RANGES = {
    MOST_NEGATIVE: (-1, -0.8),
    MORE_NEGATIVE: (-0.8, -0.6),
    NEGATIVE: (-0.6, -NEUTRAL_SCORE_THRESHOLD),
    NEUTRAL: (-NEUTRAL_SCORE_THRESHOLD, NEUTRAL_SCORE_THRESHOLD),
    POSITIVE: (NEUTRAL_SCORE_THRESHOLD, 0.6),
    MORE_POSITIVE: (0.6, 0.8),
    MOST_POSITIVE: (0.8, 1)
 }

 SUMMARY_MESSAGE_TEMPLATE = """
 Current Vibes, according to math:

 General feeling: {general_feeling} ({average_score})
 Positive comments: {positive_count}
 Negative comments: {negative_count}
 Most positive comment: {positive_feeling} ({max_score})
 Most negative comment: {negative_feeling} ({min_score})
 Standard deviation: {standard_deviation}
 """


 def calculate_standard_deviation(values: list[float]) -> float:
    """
    Calculate the standard deviation of a list of values.
    :param values: The list of values
    :return: The standard deviation of the values
    """
    n = len(values)
    mean = sum(values) / n
    variance = sum((x - mean) ** 2 for x in values) / n
    return variance ** 0.5


 def get_heat_range(score: float) -> str:
    """
    Get the heat range of a sentiment score.
    :param score: The sentiment score
    :return: The heat range of the sentiment score
    """
    for heat_range, (lower_bound, upper_bound) in HEAT_RANGES.items():
        if lower_bound <= score < upper_bound:
            return heat_range
    if score == 1:  # Handle the edge case of a score of 1
        return MOST_POSITIVE
    return 'unknown'


 class SentimentAnalysisStats:
    raw_scores: list[float]
    total_score: float
    average_score: float
    min_score: float
    max_score: float
    positive_count: int
    negative_count: int
    neutral_count: int
    standard_deviation: float

    def __init__(self, raw_scores: list[float], neutral_score_threshold: float = 0.2):
        self.raw_scores = raw_scores
        self.total_score = sum(raw_scores)
        self.average_score = sum(raw_scores) / len(raw_scores)
        self.min_score = min(raw_scores)
        self.max_score = max(raw_scores)
        self.positive_count = len([score for score in raw_scores if score > neutral_score_threshold])
        self.negative_count = len([score for score in raw_scores if score < -neutral_score_threshold])
        self.neutral_count = len(
            [score for score in raw_scores if -neutral_score_threshold <= score <= neutral_score_threshold])
        self.standard_deviation = calculate_standard_deviation(values=raw_scores)


 def get_sentiment_textblob(text: str) -> float:
    """
    Get the sentiment of a text. The sentiment is a float value between -1 (negative) and 1 (positive).
    :param text: The text to analyze
    :return: The sentiment of the text
    """
    # Create a TextBlob object
    blob = TextBlob(text)
    # Get the sentiment of the text
    return blob.sentiment.polarity


 def get_sentiment_vader(text: str, analyzer: SentimentIntensityAnalyzer) -> float:
    """
    Get the sentiment of a text. The sentiment is a float value between -1 (negative) and 1 (positive).
    :param text: The text to analyze
    :param analyzer: The sentiment analyzer
    :return: The sentiment of the text
    """
    # Get the sentiment of the text
    return analyzer.polarity_scores(text)['compound']


 def get_sentiment_stats_from_comments(comments: list[praw.reddit.models.Comment],
                                      analyzer: Analyzer,
                                      neutral_score_threshold: float = 0.2) -> SentimentAnalysisStats:
    """
    Get the average, minimum, and maximum and standard deviation of the sentiment of a list of comments.
    :param comments: The list of comments
    :param analyzer: The sentiment analyzer to use
    :param neutral_score_threshold: The threshold for considering a sentiment score as neutral
    :return: Statistics of the sentiment of the comments
    """
    if analyzer == Analyzer.TEXTBLOB:
        sentiment_scores = [get_sentiment_textblob(text=comment.body) for comment in comments]
    elif analyzer == Analyzer.VADER:
        # Initialize the VADER sentiment analyzer once, more efficient
        vader_sentiment_analyzer = SentimentIntensityAnalyzer()
        sentiment_scores = [get_sentiment_vader(text=comment.body, analyzer=vader_sentiment_analyzer) for comment in
                            comments]
    else:
        raise ValueError("Invalid sentiment analyzer")
    return SentimentAnalysisStats(raw_scores=sentiment_scores, neutral_score_threshold=neutral_score_threshold)


 def comment_is_recent(comment, time_limit_timestamp) -> bool:
    """
    Check if a comment is recent based on a time limit timestamp.
    :param comment: The comment
    :param time_limit_timestamp: The time limit timestamp
    :return: True if the comment is recent, False otherwise
    """
    return comment.created_utc >= time_limit_timestamp


 def collect_comments(thread_id: str, comment_limit: int = 100, time_limit_minutes: int = 10) \
        -> list[praw.reddit.models.Comment]:
    """
    Collect recent comments from a Reddit thread.
    :param thread_id: The ID of the thread
    :param comment_limit: The maximum number of comments to fetch
    :param time_limit_minutes: The time limit in minutes to fetch comments
    :return: A list of comments
    """
    current_time = datetime.utcnow().timestamp()  # Best to set this BEFORE retrieving all the comments, to lower chances of getting comments older than the time limit

    thread = reddit.submission(id=thread_id)

    # Fetch (up to) the 100 most recent comments
    thread.comments.replace_more(limit=0)  # Remove the "MoreComments" objects
    comments = thread.comments.list()[:comment_limit]

    # Filter comments based on the time limit
    # First, check the time of the last comment compared to the time limit
    last_comment = comments[-1]
    time_limit_timestamp = current_time - time_limit_minutes * 60
    if comment_is_recent(comment=last_comment, time_limit_timestamp=time_limit_timestamp):
        # If the last comment is within the time limit, we're done
        return comments
    else:
        # Need to remove comments that are older than the time limit
        return [comment for comment in comments if
                comment_is_recent(comment=comment, time_limit_timestamp=time_limit_timestamp)]


 def get_snapshot_summary_of_thread(thread_id: str,
                                   analyzer: Analyzer,
                                   comment_limit: int = 100,
                                   time_limit_minutes: int = 10) -> str:
    """
    Get a analysis snapshot summary of a Reddit thread.
    :param thread_id: The ID of the thread
    :param analyzer: The sentiment analyzer to use
    :param comment_limit: The maximum number of comments to fetch
    :param time_limit_minutes: The time limit in minutes to fetch comments
    :return: A summary of the thread analysis
    """
    comments: list[praw.reddit.models.Comment] = collect_comments(thread_id=thread_id, comment_limit=comment_limit,
                                                                  time_limit_minutes=time_limit_minutes)
    if not comments:
        return "No comments found in the thread."

    sentiment_statistics = get_sentiment_stats_from_comments(comments=comments, analyzer=analyzer,
                                                             neutral_score_threshold=0.2)

    summary_message = SUMMARY_MESSAGE_TEMPLATE.format(
        general_feeling=get_heat_range(sentiment_statistics.average_score),
        average_score=f"{sentiment_statistics.average_score:.3f}",
        positive_count=sentiment_statistics.positive_count,
        negative_count=sentiment_statistics.negative_count,
        positive_feeling=get_heat_range(sentiment_statistics.max_score),
        max_score=sentiment_statistics.max_score,
        negative_feeling=get_heat_range(sentiment_statistics.min_score),
        min_score=sentiment_statistics.min_score,
        standard_deviation=sentiment_statistics.standard_deviation
    )

    return summary_message


 if __name__ == '__main__':
    game_thread_id = '1g7p1ln'  # Georgia vs. Texas 2024 4th Quarter Game Thread
    analyzer_type = Analyzer.VADER  # VADER seems to be more extreme (accurate) than TextBlob, but makes for more fun results
    summary = get_snapshot_summary_of_thread(thread_id=game_thread_id, analyzer=analyzer_type, comment_limit=100,
                                             time_limit_minutes=100000000)  # In real-time, this would be 10 minutes, but for testing purposes, we set it to a large number to avoid getting no comments
    print(summary)
	import enum
	from datetime import datetime

	import dotenv
	import praw
	from textblob import TextBlob
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

	# Load the environment variables
	config = dotenv.dotenv_values(".redditcfb_sentiment_analysis_env")

	# Initialize the PRAW Reddit API client
	reddit = praw.Reddit(
	client_id=config['PRAW_CLIENT_ID'],
	client_secret=config['PRAW_CLIENT_SECRET'],
	user_agent=config['PRAW_USER_AGENT'],
	username=config['PRAW_USERNAME'],
	password=config['PRAW_PASSWORD']
	)


	class Analyzer(enum.Enum):
	TEXTBLOB = "textblob"
	VADER = "vader"


	NEUTRAL_SCORE_THRESHOLD = 0.2
	MOST_POSITIVE = "Extremely positive"
	MORE_POSITIVE = "Very positive"
	POSITIVE = "Positive"
	NEUTRAL = "Neutral"
	NEGATIVE = "Negative"
	MORE_NEGATIVE = "Very negative"
	MOST_NEGATIVE = "Extremely negative"

	HEAT_RANGES = {
	MOST_NEGATIVE: (-1, -0.8),
	MORE_NEGATIVE: (-0.8, -0.6),
	NEGATIVE: (-0.6, -NEUTRAL_SCORE_THRESHOLD),
	NEUTRAL: (-NEUTRAL_SCORE_THRESHOLD, NEUTRAL_SCORE_THRESHOLD),
	POSITIVE: (NEUTRAL_SCORE_THRESHOLD, 0.6),
	MORE_POSITIVE: (0.6, 0.8),
	MOST_POSITIVE: (0.8, 1)
	}

	SUMMARY_MESSAGE_TEMPLATE = """
	Current Vibes, according to math:

	General feeling: {general_feeling} ({average_score})
	Positive comments: {positive_count}
	Negative comments: {negative_count}
	Most positive comment: {positive_feeling} ({max_score})
	Most negative comment: {negative_feeling} ({min_score})
	Standard deviation: {standard_deviation}
	"""


	def calculate_standard_deviation(values: list[float]) -> float:
	"""
	Calculate the standard deviation of a list of values.
	:param values: The list of values
	:return: The standard deviation of the values
	"""
	n = len(values)
	mean = sum(values) / n
	variance = sum((x - mean) ** 2 for x in values) / n
	return variance ** 0.5


	def get_heat_range(score: float) -> str:
	"""
	Get the heat range of a sentiment score.
	:param score: The sentiment score
	:return: The heat range of the sentiment score
	"""
	for heat_range, (lower_bound, upper_bound) in HEAT_RANGES.items():
	if lower_bound <= score < upper_bound:
	return heat_range
	if score == 1: # Handle the edge case of a score of 1
	return MOST_POSITIVE
	return 'unknown'


	class SentimentAnalysisStats:
	raw_scores: list[float]
	total_score: float
	average_score: float
	min_score: float
	max_score: float
	positive_count: int
	negative_count: int
	neutral_count: int
	standard_deviation: float

	def __init__(self, raw_scores: list[float], neutral_score_threshold: float = 0.2):
	self.raw_scores = raw_scores
	self.total_score = sum(raw_scores)
	self.average_score = sum(raw_scores) / len(raw_scores)
	self.min_score = min(raw_scores)
	self.max_score = max(raw_scores)
	self.positive_count = len([score for score in raw_scores if score > neutral_score_threshold])
	self.negative_count = len([score for score in raw_scores if score < -neutral_score_threshold])
	self.neutral_count = len(
	[score for score in raw_scores if -neutral_score_threshold <= score <= neutral_score_threshold])
	self.standard_deviation = calculate_standard_deviation(values=raw_scores)


	def get_sentiment_textblob(text: str) -> float:
	"""
	Get the sentiment of a text. The sentiment is a float value between -1 (negative) and 1 (positive).
	:param text: The text to analyze
	:return: The sentiment of the text
	"""
	# Create a TextBlob object
	blob = TextBlob(text)
	# Get the sentiment of the text
	return blob.sentiment.polarity


	def get_sentiment_vader(text: str, analyzer: SentimentIntensityAnalyzer) -> float:
	"""
	Get the sentiment of a text. The sentiment is a float value between -1 (negative) and 1 (positive).
	:param text: The text to analyze
	:param analyzer: The sentiment analyzer
	:return: The sentiment of the text
	"""
	# Get the sentiment of the text
	return analyzer.polarity_scores(text)['compound']


	def get_sentiment_stats_from_comments(comments: list[praw.reddit.models.Comment],
	analyzer: Analyzer,
	neutral_score_threshold: float = 0.2) -> SentimentAnalysisStats:
	"""
	Get the average, minimum, and maximum and standard deviation of the sentiment of a list of comments.
	:param comments: The list of comments
	:param analyzer: The sentiment analyzer to use
	:param neutral_score_threshold: The threshold for considering a sentiment score as neutral
	:return: Statistics of the sentiment of the comments
	"""
	if analyzer == Analyzer.TEXTBLOB:
	sentiment_scores = [get_sentiment_textblob(text=comment.body) for comment in comments]
	elif analyzer == Analyzer.VADER:
	# Initialize the VADER sentiment analyzer once, more efficient
	vader_sentiment_analyzer = SentimentIntensityAnalyzer()
	sentiment_scores = [get_sentiment_vader(text=comment.body, analyzer=vader_sentiment_analyzer) for comment in
	comments]
	else:
	raise ValueError("Invalid sentiment analyzer")
	return SentimentAnalysisStats(raw_scores=sentiment_scores, neutral_score_threshold=neutral_score_threshold)


	def comment_is_recent(comment, time_limit_timestamp) -> bool:
	"""
	Check if a comment is recent based on a time limit timestamp.
	:param comment: The comment
	:param time_limit_timestamp: The time limit timestamp
	:return: True if the comment is recent, False otherwise
	"""
	return comment.created_utc >= time_limit_timestamp


	def collect_comments(thread_id: str, comment_limit: int = 100, time_limit_minutes: int = 10) \
	-> list[praw.reddit.models.Comment]:
	"""
	Collect recent comments from a Reddit thread.
	:param thread_id: The ID of the thread
	:param comment_limit: The maximum number of comments to fetch
	:param time_limit_minutes: The time limit in minutes to fetch comments
	:return: A list of comments
	"""
	current_time = datetime.utcnow().timestamp() # Best to set this BEFORE retrieving all the comments, to lower chances of getting comments older than the time limit

	thread = reddit.submission(id=thread_id)

	# Fetch (up to) the 100 most recent comments
	thread.comments.replace_more(limit=0) # Remove the "MoreComments" objects
	comments = thread.comments.list()[:comment_limit]

	# Filter comments based on the time limit
	# First, check the time of the last comment compared to the time limit
	last_comment = comments[-1]
	time_limit_timestamp = current_time - time_limit_minutes * 60
	if comment_is_recent(comment=last_comment, time_limit_timestamp=time_limit_timestamp):
	# If the last comment is within the time limit, we're done
	return comments
	else:
	# Need to remove comments that are older than the time limit
	return [comment for comment in comments if
	comment_is_recent(comment=comment, time_limit_timestamp=time_limit_timestamp)]


	def get_snapshot_summary_of_thread(thread_id: str,
	analyzer: Analyzer,
	comment_limit: int = 100,
	time_limit_minutes: int = 10) -> str:
	"""
	Get a analysis snapshot summary of a Reddit thread.
	:param thread_id: The ID of the thread
	:param analyzer: The sentiment analyzer to use
	:param comment_limit: The maximum number of comments to fetch
	:param time_limit_minutes: The time limit in minutes to fetch comments
	:return: A summary of the thread analysis
	"""
	comments: list[praw.reddit.models.Comment] = collect_comments(thread_id=thread_id, comment_limit=comment_limit,
	time_limit_minutes=time_limit_minutes)
	if not comments:
	return "No comments found in the thread."

	sentiment_statistics = get_sentiment_stats_from_comments(comments=comments, analyzer=analyzer,
	neutral_score_threshold=0.2)

	summary_message = SUMMARY_MESSAGE_TEMPLATE.format(
	general_feeling=get_heat_range(sentiment_statistics.average_score),
	average_score=f"{sentiment_statistics.average_score:.3f}",
	positive_count=sentiment_statistics.positive_count,
	negative_count=sentiment_statistics.negative_count,
	positive_feeling=get_heat_range(sentiment_statistics.max_score),
	max_score=sentiment_statistics.max_score,
	negative_feeling=get_heat_range(sentiment_statistics.min_score),
	min_score=sentiment_statistics.min_score,
	standard_deviation=sentiment_statistics.standard_deviation
	)

	return summary_message


	if __name__ == '__main__':
	game_thread_id = '1g7p1ln' # Georgia vs. Texas 2024 4th Quarter Game Thread
	analyzer_type = Analyzer.VADER # VADER seems to be more extreme (accurate) than TextBlob, but makes for more fun results
	summary = get_snapshot_summary_of_thread(thread_id=game_thread_id, analyzer=analyzer_type, comment_limit=100,
	time_limit_minutes=100000000) # In real-time, this would be 10 minutes, but for testing purposes, we set it to a large number to avoid getting no comments
	print(summary)