Created
September 17, 2009 15:35
-
-
Save jdar/188544 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## words/word.py | |
import scriptutil as SU | |
import re | |
import psycopg2 | |
from psycopg2.extras import DictCursor | |
from psycopg2.extensions import adapt | |
try: | |
db = psycopg2.connect(database="scrabble", user="python", password="python") | |
cur = db.cursor(cursor_factory=psycopg2.extras.DictCursor) | |
# cur.execute ("CREATE TABLE words (name varchar, probability int, frequency int, catches varchar, hangs varchar);") | |
except: | |
print "I am unable to connect to the database" | |
sys.ext() | |
try: | |
"trying to find a wordlist reference file" | |
except: | |
"failing to find a wordlist reference file. You're on your own, you database-dependent chump!" | |
class Word: | |
"""legal scrabble words | |
1) in official lists, and | |
2) have point/frequency attributes that are derived --- not from it's own letters --- | |
but rather from the point/prob sums of all the possible _derivative_ scrabble-legal words | |
# raw data from official scrabble lists. Can be downloaded from hasbro | |
""" | |
letters = "_ a b c d e f g h i j k l m n o p q r s t u v w x y z".split() | |
frequencies = (2, 9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2, 1, 6, 4, 6, 4, 2, 2, 1, 2, 1) | |
points = (0, 1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10) | |
letter_frequencies = dict(zip(letters,frequencies)) | |
letter_points = dict(zip(letters,frequencies)) | |
def calculate_probability(self): | |
return sum(map(lambda letter: letter_points[letter], self.catches)) | |
def calculate_frequencies(self): | |
return sum(map(lambda letter: letter_frequencies[letter], self.catches)) | |
def __init__(self,name,points=None,frequency=None,catches=None,hangs=None): | |
self.name = name | |
if catches is None: self.catches = catches | |
if frequency is None: self.frequency = frequency | |
if points is None: self.points = points | |
if hangs is None: self.hangs = hangs | |
@staticmethod | |
def count(finder_sql = ""): | |
"""rails-style finder | |
""" | |
cur.execute("select * from words {0}".format(finder_sql)) | |
return cur.rowcount | |
def hangs(self): | |
""" one-lettter shorter | |
""" | |
return self.name[0:-1] | |
# def catches(self): | |
## var = re.compile(self.name()) | |
# print self.name | |
# return var.split() | |
@staticmethod | |
def find_or_create_all_by_name(names): | |
""" | |
merge | |
VS | |
cur.copy_in( ... scratch ... ) | |
insert into words select * from (select distinct * from scratch) uniq where not exists (select 1 from words where words.name = uniq.name); | |
""" | |
# MYTODO escape names ... learning exercise. | |
matches = Word.find_all("""where words.name in {0}""".format(tuple(names))) | |
unmatched = set(names) - set(map(lambda w: w.name, matches)) | |
pdb.set_trace() | |
#MYTODO: transactions? | |
invalid_words = [] | |
created_words = [] | |
for n in unmatched: | |
w = Word(n) | |
try: | |
w.new() | |
created_words.append(w) | |
except NameError: | |
invalid_words.append(n) | |
# MYTODO: hose invalid words over to the output somehow ... through a logger, if nothing else | |
if not len(created_words) == 0: db.commit() | |
return created_words.extend(matches) or [] | |
def new(self): | |
""" vaguely rails-AR-like new() | |
validates, find-greps for catches, and pre-commits instance to the db | |
#MYTODO: profiling. Is it worth it to split up the two grep searches? (above) | |
""" | |
self.validate_against_local_lists() | |
grepd_catches = self.fgrep_catches_in_directories(("./words",)) | |
flat_catches = [] | |
for c in grepd_catches: flat_catches.extend(c) #split() | |
self.catches = "".join(map(lambda catch: catch+" ", set(flat_catches))).strip() | |
cur.execute("""INSERT INTO words VALUES {0}""".format( | |
( | |
self.name, | |
self.calculate_probability(), | |
self.calculate_frequencies(), | |
self.catches, | |
# hangs | |
self.name[1:] + " " + self.name[:-1], | |
) | |
)) | |
def validate_against_local_lists(self, lists=(".",)): | |
"""if not found in any text file => not a legal word! | |
this will also catch all the weird things people might throw. Like numbers. | |
""" | |
if [self.name] not in self.fgrep_in_directories(lists): | |
raise NameError, "not in ./words/*.txt. Look again, shall we?" | |
pass | |
def fgrep_in_directories(self, directories=(".",),search_string=None): | |
""" grep in dir ("." by default) | |
find a word in local .txt files | |
""" | |
if search_string is None: | |
search_tuple = (("^{0}$".format(self.name), re.I),) | |
else: | |
search_tuple = ((search_string, re.M),) | |
result = map(lambda directory: | |
SU.ffindgrep(directory, namefs=(lambda s: s.endswith('.txt'),), | |
regexl=search_tuple | |
).values(), | |
directories) | |
return [catch[0] for catch in result if len(catch) is not 0] | |
def fgrep_catches_in_directories(self, directories=(".",)): | |
"""find all _catches_ | |
find a word in local .txt files | |
""" | |
temp = [] | |
temp.extend(self.fgrep_in_directories(("./words",), "^{0}.$".format(self.name))) | |
temp.extend(self.fgrep_in_directories(("./words",), "^.{0}$".format(self.name))) | |
return temp | |
# raise ArgumentError | |
@staticmethod | |
def find_all(finder_sql = ""): | |
"""rails-style finder | |
""" | |
cur.execute("select * from words {0}".format(finder_sql)) | |
return map(lambda properties: Word(*properties), cur.fetchall()) | |
def flatten(l): | |
if l is []: | |
pass | |
elif isinstance(l,list): | |
return sum(map(flatten,l)) | |
else: | |
return l | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment