jdar · September 17, 2009 15:35
diff --git a/python b/python
 ## words/word.py
 import scriptutil as SU
 import re

 import psycopg2
 from psycopg2.extras import DictCursor
 from psycopg2.extensions import adapt

 try:
    db = psycopg2.connect(database="scrabble", user="python", password="python")
    cur = db.cursor(cursor_factory=psycopg2.extras.DictCursor)
 # cur.execute ("CREATE TABLE words (name varchar, probability int, frequency int, catches varchar, hangs varchar);")
 except:
    print "I am unable to connect to the database"
    sys.ext()

 try:
 	"trying to find a wordlist reference file"
 except:
    "failing to find a wordlist reference file. You're on your own, you database-dependent chump!"
    

 class Word:
    """legal scrabble words 

 	1) in official lists, and 
 	2) have point/frequency attributes that are derived --- not from it's own letters --- 
 	   but rather from the point/prob sums of all the possible _derivative_ scrabble-legal words
 	
 	# raw data from official scrabble lists. Can be downloaded from hasbro
 	"""

    letters     = "_  a  b  c  d   e  f  g  h  i  j  k  l  m  n  o  p   q  r  s  t  u  v  w  x  y   z".split()
    frequencies = (2, 9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2,  1, 6, 4, 6, 4, 2, 2, 1, 2,  1)
    points      = (0, 1, 3, 3, 2,  1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10)


    letter_frequencies = dict(zip(letters,frequencies))
    letter_points = dict(zip(letters,frequencies))

    def calculate_probability(self):
 	    return sum(map(lambda letter: letter_points[letter], self.catches))
    def calculate_frequencies(self):
 	    return sum(map(lambda letter: letter_frequencies[letter], self.catches))

    def __init__(self,name,points=None,frequency=None,catches=None,hangs=None):
        self.name = name
        
        if catches is None: self.catches = catches
        if frequency is None: self.frequency = frequency
        if points is None: self.points = points
        if hangs is None: self.hangs = hangs
        
    @staticmethod
    def count(finder_sql = ""):
        """rails-style finder
        """
        cur.execute("select * from words {0}".format(finder_sql))
        return cur.rowcount

    
    def hangs(self):
        """ one-lettter shorter
        """
        return self.name[0:-1]

 #    def catches(self):
 ##		var = re.compile(self.name())
 #        print self.name
 #        return var.split()

    @staticmethod
    def find_or_create_all_by_name(names):
        """
        merge
        VS
 	    cur.copy_in( ... scratch ... )
        insert into words select * from (select distinct * from scratch) uniq where not exists (select 1 from words where words.name = uniq.name);
        """
        
        # MYTODO escape names ... learning exercise.
        matches = Word.find_all("""where words.name in {0}""".format(tuple(names)))
        unmatched = set(names) - set(map(lambda w: w.name, matches))
        pdb.set_trace()

 #MYTODO: transactions?
        invalid_words = []
        created_words = []
        for n in unmatched:
            w = Word(n)
            try:
                w.new()
                created_words.append(w)
            except NameError:
                invalid_words.append(n)
                # MYTODO: hose invalid words over to the output somehow ... through a logger, if nothing else

        if not len(created_words) == 0: db.commit()
        return created_words.extend(matches) or []


    def new(self):
        """ vaguely rails-AR-like new()

        validates, find-greps for catches, and pre-commits instance to the db

        #MYTODO: profiling. Is it worth it to split up the two grep searches? (above)
        """
        self.validate_against_local_lists()
        grepd_catches = self.fgrep_catches_in_directories(("./words",))
        
        flat_catches = []
        for c in grepd_catches: flat_catches.extend(c) #split()
        self.catches = "".join(map(lambda catch: catch+" ", set(flat_catches))).strip()

        cur.execute("""INSERT INTO words VALUES {0}""".format(
           (
            self.name,
            self.calculate_probability(),
            self.calculate_frequencies(),
            self.catches,
           # hangs
            self.name[1:] + " " + self.name[:-1],
           )
        ))


    def validate_against_local_lists(self, lists=(".",)):
        """if not found in any text file => not a legal word!
        
        this will also catch all the weird things people might throw. Like numbers.
        """
        if [self.name] not in self.fgrep_in_directories(lists):
            raise NameError, "not in ./words/*.txt. Look again, shall we?"
        pass
        
    def fgrep_in_directories(self, directories=(".",),search_string=None):
        """ grep in dir ("." by default)
        
        find a word in local .txt files
        """
        if search_string is None:
            search_tuple = (("^{0}$".format(self.name), re.I),)
        else:
            search_tuple = ((search_string, re.M),)

        result = map(lambda directory: 
                    SU.ffindgrep(directory, namefs=(lambda s: s.endswith('.txt'),), 
                                            regexl=search_tuple
                             ).values(), 
               directories)

        return [catch[0] for catch in result if len(catch) is not 0]

    def fgrep_catches_in_directories(self, directories=(".",)):
        """find all _catches_
        
        find a word in local .txt files
        """
        temp = [] 
        temp.extend(self.fgrep_in_directories(("./words",), "^{0}.$".format(self.name)))
        temp.extend(self.fgrep_in_directories(("./words",), "^.{0}$".format(self.name)))
        return temp

 # raise ArgumentError
    @staticmethod
    def find_all(finder_sql = ""):
        """rails-style finder
        """
        cur.execute("select * from words {0}".format(finder_sql))
        return map(lambda properties: Word(*properties), cur.fetchall())


    def flatten(l):
        if l is []:
            pass
        elif isinstance(l,list):
            return sum(map(flatten,l))
        else:
            return l
	## words/word.py
	import scriptutil as SU
	import re

	import psycopg2
	from psycopg2.extras import DictCursor
	from psycopg2.extensions import adapt

	try:
	db = psycopg2.connect(database="scrabble", user="python", password="python")
	cur = db.cursor(cursor_factory=psycopg2.extras.DictCursor)
	# cur.execute ("CREATE TABLE words (name varchar, probability int, frequency int, catches varchar, hangs varchar);")
	except:
	print "I am unable to connect to the database"
	sys.ext()

	try:
	"trying to find a wordlist reference file"
	except:
	"failing to find a wordlist reference file. You're on your own, you database-dependent chump!"


	class Word:
	"""legal scrabble words

	1) in official lists, and
	2) have point/frequency attributes that are derived --- not from it's own letters ---
	but rather from the point/prob sums of all the possible _derivative_ scrabble-legal words

	# raw data from official scrabble lists. Can be downloaded from hasbro
	"""

	letters = "_ a b c d e f g h i j k l m n o p q r s t u v w x y z".split()
	frequencies = (2, 9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2, 1, 6, 4, 6, 4, 2, 2, 1, 2, 1)
	points = (0, 1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10)


	letter_frequencies = dict(zip(letters,frequencies))
	letter_points = dict(zip(letters,frequencies))

	def calculate_probability(self):
	return sum(map(lambda letter: letter_points[letter], self.catches))
	def calculate_frequencies(self):
	return sum(map(lambda letter: letter_frequencies[letter], self.catches))

	def __init__(self,name,points=None,frequency=None,catches=None,hangs=None):
	self.name = name

	if catches is None: self.catches = catches
	if frequency is None: self.frequency = frequency
	if points is None: self.points = points
	if hangs is None: self.hangs = hangs

	@staticmethod
	def count(finder_sql = ""):
	"""rails-style finder
	"""
	cur.execute("select * from words {0}".format(finder_sql))
	return cur.rowcount


	def hangs(self):
	""" one-lettter shorter
	"""
	return self.name[0:-1]

	# def catches(self):
	## var = re.compile(self.name())
	# print self.name
	# return var.split()

	@staticmethod
	def find_or_create_all_by_name(names):
	"""
	merge
	VS
	cur.copy_in( ... scratch ... )
	insert into words select * from (select distinct * from scratch) uniq where not exists (select 1 from words where words.name = uniq.name);
	"""

	# MYTODO escape names ... learning exercise.
	matches = Word.find_all("""where words.name in {0}""".format(tuple(names)))
	unmatched = set(names) - set(map(lambda w: w.name, matches))
	pdb.set_trace()

	#MYTODO: transactions?
	invalid_words = []
	created_words = []
	for n in unmatched:
	w = Word(n)
	try:
	w.new()
	created_words.append(w)
	except NameError:
	invalid_words.append(n)
	# MYTODO: hose invalid words over to the output somehow ... through a logger, if nothing else

	if not len(created_words) == 0: db.commit()
	return created_words.extend(matches) or []


	def new(self):
	""" vaguely rails-AR-like new()

	validates, find-greps for catches, and pre-commits instance to the db

	#MYTODO: profiling. Is it worth it to split up the two grep searches? (above)
	"""
	self.validate_against_local_lists()
	grepd_catches = self.fgrep_catches_in_directories(("./words",))

	flat_catches = []
	for c in grepd_catches: flat_catches.extend(c) #split()
	self.catches = "".join(map(lambda catch: catch+" ", set(flat_catches))).strip()

	cur.execute("""INSERT INTO words VALUES {0}""".format(
	(
	self.name,
	self.calculate_probability(),
	self.calculate_frequencies(),
	self.catches,
	# hangs
	self.name[1:] + " " + self.name[:-1],
	)
	))


	def validate_against_local_lists(self, lists=(".",)):
	"""if not found in any text file => not a legal word!

	this will also catch all the weird things people might throw. Like numbers.
	"""
	if [self.name] not in self.fgrep_in_directories(lists):
	raise NameError, "not in ./words/*.txt. Look again, shall we?"
	pass

	def fgrep_in_directories(self, directories=(".",),search_string=None):
	""" grep in dir ("." by default)

	find a word in local .txt files
	"""
	if search_string is None:
	search_tuple = (("^{0}$".format(self.name), re.I),)
	else:
	search_tuple = ((search_string, re.M),)

	result = map(lambda directory:
	SU.ffindgrep(directory, namefs=(lambda s: s.endswith('.txt'),),
	regexl=search_tuple
	).values(),
	directories)

	return [catch[0] for catch in result if len(catch) is not 0]

	def fgrep_catches_in_directories(self, directories=(".",)):
	"""find all _catches_

	find a word in local .txt files
	"""
	temp = []
	temp.extend(self.fgrep_in_directories(("./words",), "^{0}.$".format(self.name)))
	temp.extend(self.fgrep_in_directories(("./words",), "^.{0}$".format(self.name)))
	return temp

	# raise ArgumentError
	@staticmethod
	def find_all(finder_sql = ""):
	"""rails-style finder
	"""
	cur.execute("select * from words {0}".format(finder_sql))
	return map(lambda properties: Word(*properties), cur.fetchall())


	def flatten(l):
	if l is []:
	pass
	elif isinstance(l,list):
	return sum(map(flatten,l))
	else:
	return l