Skip to content

Instantly share code, notes, and snippets.

@jeff082chen
Created March 22, 2024 13:43
Show Gist options
  • Save jeff082chen/2158c380bb6e229a54b0a5e63b5bc1f0 to your computer and use it in GitHub Desktop.
Save jeff082chen/2158c380bb6e229a54b0a5e63b5bc1f0 to your computer and use it in GitHub Desktop.
# text = 妳讀進來的字串
# Process the text
words = text.lower().split()
word_counts = {}
character_counts = {char: 0 for char in "abcdefghijklmnopqrstuvwxyz0123456789"}
for word in words:
# Remove punctuation from words
cleaned_word = "".join([char for char in word if char.isalnum()])
if cleaned_word:
word_counts[cleaned_word] = word_counts.get(cleaned_word, 0) + 1
for char in cleaned_word:
if char in character_counts:
character_counts[char] += 1
# Total number of words
total_words = sum(word_counts.values())
# Total number of word's type
total_word_types = len(word_counts)
# The first three words that most frequently appear in the article and their number
most_frequent_words = sorted(word_counts.items(), key=lambda item: item[1], reverse=True)[:3]
# First three words that only appear once, in lexical order
words_appear_once = sorted([word for word, count in word_counts.items() if count == 1])[:3]
total_words, total_word_types, most_frequent_words, words_appear_once, sorted(character_counts.items())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment