ajbrock · November 8, 2018 17:21
diff --git a/fewest_dominion_words.py b/fewest_dominion_words.py
 import numpy as np
 # Corpus available here: https://pastebin.com/WqD6fAgu
 # Corpus taken from https://dominionstrategy.com/all-cards/

 # Read all cards into memory
 with open('dominion_cards.html', 'r') as rfile:
  x = rfile.readlines()
  
 # Convenience function to count words, used later
 def count_words(text):
  # initialize count
  count = 0
  # Count the number of + and $
  for item in ['+', '$', '-']:
    count += text.count(item)
    text = text.replace(item, '')
  # Replace the slashes with spaces
  text = text.replace('/', ' ')
  # Split based on spaces, then with 
  # count the remaining words
  split_text = [item for item in text.split(' ') if any(item)]
  count += len(split_text)
  return count

 # Extract cards and format pythonically
 c = []
 i = -1 
 active=False
 weasels = ['\n']
 for s in x:
  # The <tr> indicates the start of a card
  if '<tr>' in s:
    active = True
    i += 1
    c += [[]]
  elif '</tr>' in s:
    active = False  
  elif active:
    ii = 0
    # Replace all html tags
    while s.find('<') != -1:
      ii +=1
      if ii > 100:
        print('breaking for safety')
        break
      low, high = s.find('<'), s.find('>')
      s = s[:low] + s[high + 1:]
    for word in weasels:
      s = s.replace(word, '')
    s = s.replace('Victory Points', 'VP')
    s = s.replace('Victory Point', 'VP')
    c[-1] += [s]
   
 # Toss landmarks and other somesuch so and sos
 c = [item for item in c if not any([word in item[1] for word in 'Boon', 'Landmark', 'Hex', 'State', 'Event', 'Castle', 'Ruins', 'Shelter'])]
 txt = [', '.join(item[3:]) for item in c]

 # Count words  
 counts = [count_words(item) for item in txt]
 order = np.argsort(counts)
 np.asarray(counts)[order[:10]]
 # Print out num_display cards and their wordcounts; display more than ten so we can skip ones with mistakes or errors
 num_display = 25
 print('Displaying 25 cards with lowest number of words...')
 for i in range(num_display):
  print('#%d: %s, %s' % (i, np.asarray(c)[order[i]], np.asarray(counts)[order[i]]))
  
 # Which ones I select
 print('-------------------------------------')
 print('-------------------------------------')
 # print('My selected cards, and the total count:')
 my_indices = [4, 7, 8, 9, 10, 11, 12, 13, 14, 15]
 for num, i in enumerate(my_indices):
  print('#%d: %s, %s' % (num + 1, np.asarray(c)[order[i]], np.asarray(counts)[order[i]]))
 print('Sum of all words is %d' % sum([counts[order[index]] for index in my_indices]))
	import numpy as np
	# Corpus available here: https://pastebin.com/WqD6fAgu
	# Corpus taken from https://dominionstrategy.com/all-cards/

	# Read all cards into memory
	with open('dominion_cards.html', 'r') as rfile:
	x = rfile.readlines()

	# Convenience function to count words, used later
	def count_words(text):
	# initialize count
	count = 0
	# Count the number of + and $
	for item in ['+', '$', '-']:
	count += text.count(item)
	text = text.replace(item, '')
	# Replace the slashes with spaces
	text = text.replace('/', ' ')
	# Split based on spaces, then with
	# count the remaining words
	split_text = [item for item in text.split(' ') if any(item)]
	count += len(split_text)
	return count

	# Extract cards and format pythonically
	c = []
	i = -1
	active=False
	weasels = ['\n']
	for s in x:
	# The <tr> indicates the start of a card
	if '<tr>' in s:
	active = True
	i += 1
	c += [[]]
	elif '</tr>' in s:
	active = False
	elif active:
	ii = 0
	# Replace all html tags
	while s.find('<') != -1:
	ii +=1
	if ii > 100:
	print('breaking for safety')
	break
	low, high = s.find('<'), s.find('>')
	s = s[:low] + s[high + 1:]
	for word in weasels:
	s = s.replace(word, '')
	s = s.replace('Victory Points', 'VP')
	s = s.replace('Victory Point', 'VP')
	c[-1] += [s]

	# Toss landmarks and other somesuch so and sos
	c = [item for item in c if not any([word in item[1] for word in 'Boon', 'Landmark', 'Hex', 'State', 'Event', 'Castle', 'Ruins', 'Shelter'])]
	txt = [', '.join(item[3:]) for item in c]

	# Count words
	counts = [count_words(item) for item in txt]
	order = np.argsort(counts)
	np.asarray(counts)[order[:10]]
	# Print out num_display cards and their wordcounts; display more than ten so we can skip ones with mistakes or errors
	num_display = 25
	print('Displaying 25 cards with lowest number of words...')
	for i in range(num_display):
	print('#%d: %s, %s' % (i, np.asarray(c)[order[i]], np.asarray(counts)[order[i]]))

	# Which ones I select
	print('-------------------------------------')
	print('-------------------------------------')
	# print('My selected cards, and the total count:')
	my_indices = [4, 7, 8, 9, 10, 11, 12, 13, 14, 15]
	for num, i in enumerate(my_indices):
	print('#%d: %s, %s' % (num + 1, np.asarray(c)[order[i]], np.asarray(counts)[order[i]]))
	print('Sum of all words is %d' % sum([counts[order[index]] for index in my_indices]))
No results found