soeirosantos · October 31, 2016 21:25
diff --git a/ycombinator_word_counter.py b/ycombinator_word_counter.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import feedparser, operator
 import re
 from stop_words import get_stop_words
 from time import gmtime, strftime

 stop_words = get_stop_words('en')

 OUTPUT_FOLDER = 'out/'

 def execute():
  feed = feedparser.parse('https://news.ycombinator.com/bigrss')
  titles = map(lambda item: item['title'],  feed['items'])
  words = []
  def words_concat(w):
    '''
    uses just one list to store 
    all word occurrences
    '''
    words.extend(w.split())

  def clean_word(w):
    '''
    clean a specific word, removing
    all these chars below, puts it
    in lower case and removes space
    around
    '''
    return re.sub('[-!$%^&*()_+|~=`{}\[\]:";\'<>?,.\/\#]', '', w.lower().strip())

  map(words_concat, titles)
  cleaned_words = map(clean_word, words)
  cleaned_stop_words = map(clean_word, stop_words)
  cleaned_words = filter(lambda x: x not in cleaned_stop_words, cleaned_words)
  qty_per_word = dict()
  def count_word(w):
    '''
    count words
    '''
    if qty_per_word.has_key(w):
      qty_per_word[w] += 1
    else:
      qty_per_word[w] = 1

  map(count_word, cleaned_words)
  output = open(OUTPUT_FOLDER + strftime("%Y%m%d_%H%M%S", gmtime()), 'w')
  for w in sorted(qty_per_word.items(), key=lambda x: x[1], reverse=True):
    try:
      if w[1] > 1:
        output.write(w[0] + " " + str(w[1]) + "\n")
      else:
        break
    except UnicodeEncodeError: #just a little fix
      pass

  output.close()
  
 if __name__ == '__main__':
  execute()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	import feedparser, operator
	import re
	from stop_words import get_stop_words
	from time import gmtime, strftime

	stop_words = get_stop_words('en')

	OUTPUT_FOLDER = 'out/'

	def execute():
	feed = feedparser.parse('https://news.ycombinator.com/bigrss')
	titles = map(lambda item: item['title'], feed['items'])
	words = []
	def words_concat(w):
	'''
	uses just one list to store
	all word occurrences
	'''
	words.extend(w.split())

	def clean_word(w):
	'''
	clean a specific word, removing
	all these chars below, puts it
	in lower case and removes space
	around
	'''
	return re.sub('[-!$%^&*()_+\|~=`{}\[\]:";\'<>?,.\/\#]', '', w.lower().strip())

	map(words_concat, titles)
	cleaned_words = map(clean_word, words)
	cleaned_stop_words = map(clean_word, stop_words)
	cleaned_words = filter(lambda x: x not in cleaned_stop_words, cleaned_words)
	qty_per_word = dict()
	def count_word(w):
	'''
	count words
	'''
	if qty_per_word.has_key(w):
	qty_per_word[w] += 1
	else:
	qty_per_word[w] = 1

	map(count_word, cleaned_words)
	output = open(OUTPUT_FOLDER + strftime("%Y%m%d_%H%M%S", gmtime()), 'w')
	for w in sorted(qty_per_word.items(), key=lambda x: x[1], reverse=True):
	try:
	if w[1] > 1:
	output.write(w[0] + " " + str(w[1]) + "\n")
	else:
	break
	except UnicodeEncodeError: #just a little fix
	pass

	output.close()

	if __name__ == '__main__':
	execute()