karrtikr · April 4, 2023 07:11
diff --git a/info-needed-comments.py b/info-needed-comments.py
 import subprocess
 import json
 from collections import Counter
 from nltk.corpus import stopwords
 from nltk import pos_tag
 from spellchecker import SpellChecker

 # List of triagers whose comments will be included in the analysis
 TRIAGERS = ["karrtikr","karthiknadig","paulacamargo25","eleanorjboyd", "brettcannon", "ericsnowcurrently", "DonJayamanne"]

 # Get the list of issue numbers
 result = subprocess.run(["gh", "issue", "list", "--state=closed", "--label=info-needed", "-R", "microsoft/vscode-python", "--json", "number"], capture_output=True)
 if result.returncode != 0:
    print(result.stderr.decode())
    exit(1)

 issues = json.loads(result.stdout.decode())

 # Loop through the issue numbers and retrieve the second last comment for each issue
 all_comments = []
 for issue in issues:
    result = subprocess.run(["gh", "issue", "view", str(issue["number"]), "--comments", "-R", "microsoft/vscode-python", "--json", "comments"], capture_output=True)
    if result.returncode != 0:
        print(result.stderr.decode())
        exit(1)

    data = json.loads(result.stdout.decode())
    if "comments" not in data:
        print(f"Error: comments not found in JSON output for issue {issue['number']}")
        continue

    triager_comments = []
    for comment in data["comments"]:
        if comment["author"]["login"] in TRIAGERS:
            triager_comments.append(comment)

    if len(triager_comments) >= 2:
        comment = triager_comments[-2]
        print(comment["body"])
        print('--------------------')
        all_comments.append(comment["body"])

 # Ignore stop words, nouns, and pronouns, and count the frequency of the remaining words in the second last comment across all issues
 stop_words = set(stopwords.words('english'))
 spell = SpellChecker(language='en', distance=1)
 all_words = []
 for comment in all_comments:
    words = comment.split()
    if len(words) >= 2:
        # filter out stop words, non-words, pronouns, and nouns
        cleaned_words = [word for word, pos in pos_tag(words) if word not in stop_words and word.isalpha() and not spell.unknown([word]) and pos != 'NN' and pos != 'NNS' and pos != 'NNP' and pos != 'NNPS' and pos != 'PRP' and pos != 'PRP$']
        all_words.extend(cleaned_words)

 word_counts = Counter(all_words)

 # Print the top 10 most frequent words
 print("Top 10 most frequent words in the second last comment across all issues (excluding stop words, nouns, and pronouns):")
 for word, count in word_counts.most_common(10):
    print(f"{word}: {count} occurrences")
	import subprocess
	import json
	from collections import Counter
	from nltk.corpus import stopwords
	from nltk import pos_tag
	from spellchecker import SpellChecker

	# List of triagers whose comments will be included in the analysis
	TRIAGERS = ["karrtikr","karthiknadig","paulacamargo25","eleanorjboyd", "brettcannon", "ericsnowcurrently", "DonJayamanne"]

	# Get the list of issue numbers
	result = subprocess.run(["gh", "issue", "list", "--state=closed", "--label=info-needed", "-R", "microsoft/vscode-python", "--json", "number"], capture_output=True)
	if result.returncode != 0:
	print(result.stderr.decode())
	exit(1)

	issues = json.loads(result.stdout.decode())

	# Loop through the issue numbers and retrieve the second last comment for each issue
	all_comments = []
	for issue in issues:
	result = subprocess.run(["gh", "issue", "view", str(issue["number"]), "--comments", "-R", "microsoft/vscode-python", "--json", "comments"], capture_output=True)
	if result.returncode != 0:
	print(result.stderr.decode())
	exit(1)

	data = json.loads(result.stdout.decode())
	if "comments" not in data:
	print(f"Error: comments not found in JSON output for issue {issue['number']}")
	continue

	triager_comments = []
	for comment in data["comments"]:
	if comment["author"]["login"] in TRIAGERS:
	triager_comments.append(comment)

	if len(triager_comments) >= 2:
	comment = triager_comments[-2]
	print(comment["body"])
	print('--------------------')
	all_comments.append(comment["body"])

	# Ignore stop words, nouns, and pronouns, and count the frequency of the remaining words in the second last comment across all issues
	stop_words = set(stopwords.words('english'))
	spell = SpellChecker(language='en', distance=1)
	all_words = []
	for comment in all_comments:
	words = comment.split()
	if len(words) >= 2:
	# filter out stop words, non-words, pronouns, and nouns
	cleaned_words = [word for word, pos in pos_tag(words) if word not in stop_words and word.isalpha() and not spell.unknown([word]) and pos != 'NN' and pos != 'NNS' and pos != 'NNP' and pos != 'NNPS' and pos != 'PRP' and pos != 'PRP$']
	all_words.extend(cleaned_words)

	word_counts = Counter(all_words)

	# Print the top 10 most frequent words
	print("Top 10 most frequent words in the second last comment across all issues (excluding stop words, nouns, and pronouns):")
	for word, count in word_counts.most_common(10):
	print(f"{word}: {count} occurrences")