Skip to content

Instantly share code, notes, and snippets.

@judotens
Last active June 19, 2021 17:38
Show Gist options
  • Save judotens/21a4f73e97fa39401646 to your computer and use it in GitHub Desktop.
Save judotens/21a4f73e97fa39401646 to your computer and use it in GitHub Desktop.
Crosswords Generator
Best paired with these JS:
http://www.jesseweisbeck.com/crossword/
Usage: crossword.py [options]
Options:
-h, --help show this help message and exit
-r ROWS, --rows=ROWS Set row height
-c COLUMNS, --columns=COLUMNS
Set column width
-i INPUTFILE, --inputfile=INPUTFILE
Set questions file. Content formatted with text
format. -> <first word = answer><space><next words =
question>
-j INPUTJSON, --inputjson=INPUTJSON
Set questions on Json list format. [[<answer>,
<question]]
-t TIMEOUT, --timeout=TIMEOUT
Set maximum timeout
-o OUTPUT, --output=OUTPUT
Write output to file
sample output:
[{"startx": 1, "starty": 1, "orientation": "across", "clue": "Singa Terbang", "answer": "lionair", "position": 1}, {"startx": 4, "starty": 2, "orientation": "down", "clue": "Menangkap penjahat", "answer": "polisi", "position": 2}, {"startx": 1, "starty": 7, "orientation": "down", "clue": "Kebun Binatang", "answer": "ragunan", "position": 3}, {"startx": 6, "starty": 4, "orientation": "across", "clue": "Pasangan ibu", "answer": "bapak", "position": 4}, {"startx": 5, "starty": 5, "orientation": "down", "clue": "Menara Eifel", "answer": "paris", "position": 5}, {"startx": 4, "starty": 6, "orientation": "across", "clue": "Tempat tinggal", "answer": "rumah", "position": 6}, {"startx": 9, "starty": 1, "orientation": "across", "clue": "Hewan Pengerat", "answer": "tikus", "position": 7}, {"startx": 1, "starty": 9, "orientation": "down", "clue": "Wadah air", "answer": "gelas", "position": 8}, {"startx": 2, "starty": 9, "orientation": "across", "clue": "Udang dikeringin", "answer": "ebi", "position": 9}]
import random, re, time, string
from copy import copy as duplicate
import optparse
# optional, speeds up by a factor of 4
#import psyco
#psyco.full()
class Crossword(object):
def __init__(self, cols, rows, empty = '-', maxloops = 2000, available_words=[]):
self.cols = cols
self.rows = rows
self.empty = empty
self.maxloops = maxloops
self.available_words = available_words
self.randomize_word_list()
self.current_word_list = []
self.debug = 0
self.clear_grid()
def clear_grid(self): # initialize grid and fill with empty character
self.grid = []
for i in range(self.rows):
ea_row = []
for j in range(self.cols):
ea_row.append(self.empty)
self.grid.append(ea_row)
def randomize_word_list(self): # also resets words and sorts by length
temp_list = []
for word in self.available_words:
if isinstance(word, Word):
temp_list.append(Word(word.word, word.clue))
else:
temp_list.append(Word(word[0], word[1]))
random.shuffle(temp_list) # randomize word list
temp_list.sort(key=lambda i: len(i.word), reverse=True) # sort by length
self.available_words = temp_list
def compute_crossword(self, time_permitted = 1.00, spins=2):
time_permitted = float(time_permitted)
count = 0
copy = Crossword(self.cols, self.rows, self.empty, self.maxloops, self.available_words)
start_full = float(time.time())
while (float(time.time()) - start_full) < time_permitted or count == 0: # only run for x seconds
self.debug += 1
copy.current_word_list = []
copy.clear_grid()
copy.randomize_word_list()
x = 0
while x < spins: # spins; 2 seems to be plenty
for word in copy.available_words:
if word not in copy.current_word_list:
copy.fit_and_add(word)
x += 1
#print copy.solution()
#print len(copy.current_word_list), len(self.current_word_list), self.debug
# buffer the best crossword by comparing placed words
if len(copy.current_word_list) > len(self.current_word_list):
self.current_word_list = copy.current_word_list
self.grid = copy.grid
count += 1
return
def suggest_coord(self, word):
count = 0
coordlist = []
glc = -1
for given_letter in word.word: # cycle through letters in word
glc += 1
rowc = 0
for row in self.grid: # cycle through rows
rowc += 1
colc = 0
for cell in row: # cycle through letters in rows
colc += 1
if given_letter == cell: # check match letter in word to letters in row
try: # suggest vertical placement
if rowc - glc > 0: # make sure we're not suggesting a starting point off the grid
if ((rowc - glc) + word.length) <= self.rows: # make sure word doesn't go off of grid
coordlist.append([colc, rowc - glc, 1, colc + (rowc - glc), 0])
except: pass
try: # suggest horizontal placement
if colc - glc > 0: # make sure we're not suggesting a starting point off the grid
if ((colc - glc) + word.length) <= self.cols: # make sure word doesn't go off of grid
coordlist.append([colc - glc, rowc, 0, rowc + (colc - glc), 0])
except: pass
# example: coordlist[0] = [col, row, vertical, col + row, score]
#print word.word
#print coordlist
new_coordlist = self.sort_coordlist(coordlist, word)
#print new_coordlist
return new_coordlist
def sort_coordlist(self, coordlist, word): # give each coordinate a score, then sort
new_coordlist = []
for coord in coordlist:
col, row, vertical = coord[0], coord[1], coord[2]
coord[4] = self.check_fit_score(col, row, vertical, word) # checking scores
if coord[4]: # 0 scores are filtered
new_coordlist.append(coord)
random.shuffle(new_coordlist) # randomize coord list; why not?
new_coordlist.sort(key=lambda i: i[4], reverse=True) # put the best scores first
return new_coordlist
def fit_and_add(self, word): # doesn't really check fit except for the first word; otherwise just adds if score is good
fit = False
count = 0
coordlist = self.suggest_coord(word)
while not fit and count < self.maxloops:
if len(self.current_word_list) == 0: # this is the first word: the seed
# top left seed of longest word yields best results (maybe override)
vertical, col, row = random.randrange(0, 2), 1, 1
'''
# optional center seed method, slower and less keyword placement
if vertical:
col = int(round((self.cols + 1)/2, 0))
row = int(round((self.rows + 1)/2, 0)) - int(round((word.length + 1)/2, 0))
else:
col = int(round((self.cols + 1)/2, 0)) - int(round((word.length + 1)/2, 0))
row = int(round((self.rows + 1)/2, 0))
# completely random seed method
col = random.randrange(1, self.cols + 1)
row = random.randrange(1, self.rows + 1)
'''
if self.check_fit_score(col, row, vertical, word):
fit = True
self.set_word(col, row, vertical, word, force=True)
else: # a subsquent words have scores calculated
try:
col, row, vertical = coordlist[count][0], coordlist[count][1], coordlist[count][2]
except IndexError: return # no more cordinates, stop trying to fit
if coordlist[count][4]: # already filtered these out, but double check
fit = True
self.set_word(col, row, vertical, word, force=True)
count += 1
return
def check_fit_score(self, col, row, vertical, word):
'''
And return score (0 signifies no fit). 1 means a fit, 2+ means a cross.
The more crosses the better.
'''
if col < 1 or row < 1:
return 0
count, score = 1, 1 # give score a standard value of 1, will override with 0 if collisions detected
for letter in word.word:
try:
active_cell = self.get_cell(col, row)
except IndexError:
return 0
if active_cell == self.empty or active_cell == letter:
pass
else:
return 0
if active_cell == letter:
score += 1
if vertical:
# check surroundings
if active_cell != letter: # don't check surroundings if cross point
if not self.check_if_cell_clear(col+1, row): # check right cell
return 0
if not self.check_if_cell_clear(col-1, row): # check left cell
return 0
if count == 1: # check top cell only on first letter
if not self.check_if_cell_clear(col, row-1):
return 0
if count == len(word.word): # check bottom cell only on last letter
if not self.check_if_cell_clear(col, row+1):
return 0
else: # else horizontal
# check surroundings
if active_cell != letter: # don't check surroundings if cross point
if not self.check_if_cell_clear(col, row-1): # check top cell
return 0
if not self.check_if_cell_clear(col, row+1): # check bottom cell
return 0
if count == 1: # check left cell only on first letter
if not self.check_if_cell_clear(col-1, row):
return 0
if count == len(word.word): # check right cell only on last letter
if not self.check_if_cell_clear(col+1, row):
return 0
if vertical: # progress to next letter and position
row += 1
else: # else horizontal
col += 1
count += 1
return score
def set_word(self, col, row, vertical, word, force=False): # also adds word to word list
if force:
word.col = col
word.row = row
word.vertical = vertical
self.current_word_list.append(word)
for letter in word.word:
self.set_cell(col, row, letter)
if vertical:
row += 1
else:
col += 1
return
def set_cell(self, col, row, value):
self.grid[row-1][col-1] = value
def get_cell(self, col, row):
return self.grid[row-1][col-1]
def check_if_cell_clear(self, col, row):
try:
cell = self.get_cell(col, row)
if cell == self.empty:
return True
except IndexError:
pass
return False
def solution(self): # return solution grid
outStr = ""
for r in range(self.rows):
for c in self.grid[r]:
outStr += '%s ' % c
outStr += '\n'
return outStr
def word_find(self): # return solution grid
outStr = ""
for r in range(self.rows):
for c in self.grid[r]:
if c == self.empty:
outStr += '%s ' % string.lowercase[random.randint(0,len(string.lowercase)-1)]
else:
outStr += '%s ' % c
outStr += '\n'
return outStr
def order_number_words(self): # orders words and applies numbering system to them
self.current_word_list.sort(key=lambda i: (i.col + i.row))
count, icount = 1, 1
for word in self.current_word_list:
word.number = count
if icount < len(self.current_word_list):
if word.col == self.current_word_list[icount].col and word.row == self.current_word_list[icount].row:
pass
else:
count += 1
icount += 1
def display(self, order=True): # return (and order/number wordlist) the grid minus the words adding the numbers
outStr = ""
if order:
self.order_number_words()
copy = self
for word in self.current_word_list:
copy.set_cell(word.col, word.row, word.number)
for r in range(copy.rows):
for c in copy.grid[r]:
outStr += '%s ' % c
outStr += '\n'
outStr = re.sub(r'[a-z]', ' ', outStr)
return outStr
def word_bank(self):
outStr = ''
temp_list = duplicate(self.current_word_list)
random.shuffle(temp_list) # randomize word list
for word in temp_list:
outStr += '%s\n' % word.word
return outStr
def legend(self): # must order first
outStr = ''
for word in self.current_word_list:
outStr += '%d. (%d,%d) %s: %s\n' % (word.number, word.col, word.row, word.down_across(), word.clue )
return outStr
class Word(object):
def __init__(self, word=None, clue=None):
self.word = re.sub(r'\s', '', word.lower())
self.clue = clue
self.length = len(self.word)
# the below are set when placed on board
self.row = None
self.col = None
self.vertical = None
self.number = None
def down_across(self): # return down or across
if self.vertical:
return 'down'
else:
return 'across'
def __repr__(self):
return self.word
### end class, start execution
def render_js(crossw):
crossw.compute_crossword(2)
crossw.order_number_words()
puzzle=[]
for word in crossw.current_word_list:
item = {'clue': word.clue, 'answer': word.word, 'position': word.number, 'orientation': word.down_across(), 'startx': word.row, 'starty': word.col}
puzzle.append(item)
return puzzle
if __name__ == "__main__":
import json, sys
"""
sample inputjson:
[["ebi","Udang dikeringin"],["rumah","Tempat tinggal"],["bapak","Pasangan ibu"],["gelas","Wadah air"],["tikus","Hewan Pengerat"],["paris","Menara Eifel"],["ragunan","Kebun Binatang"],["lion air","Singa Terbang"],["polisi","Menangkap penjahat"]]
sample inputfile:
boomee Portal media menyajikan isu Indonesia
iPhone Perangkat gadget canggih dalam genggaman
kambing Hewan yang dipotong saat kurban
Sapi Hewan yang susunya diperah
Lebaran Hari besar umat islam setelah berpuasa
Natal Hari besar umat Kristiani pada akhir tahun
macet Permasalahan klasik lalulintas di kota besar
angpao Titipan uang yang diberikan pada perayaan hari besar
Jokowi Presiden Indonesia saat ini
MKD Makhamah Kehormatan Dewan
Gojek Ojek pesan online
Kaskus Forum komunitas terbesar Indonesia
FJB Subforum Kaskus untuk melakukan Jual / Beli
"""
parser = optparse.OptionParser()
parser.add_option('-r', '--rows', dest='rows', help='Set row height')
parser.add_option('-c', '--columns', dest='columns', help='Set column width')
parser.add_option('-i', '--inputfile', dest='inputfile', help='Set questions file. Content formatted with text format. -> <first word = answer><space><next words = question>')
parser.add_option('-j', '--inputjson', dest='inputjson', help='Set questions on Json list format. [[<answer>, <question]]')
parser.add_option('-t', '--timeout', dest='timeout', help='Set maximum timeout')
parser.add_option('-o', '--output', dest='output', help='Write output to file')
opts, args = parser.parse_args()
if not opts.rows: opts.rows = 13
else: opts.rows = int(opts.rows)
if not opts.columns: opts.columns = 13
else: opts.columns = int(opts.columns)
if not opts.timeout: opts.timeout = 2000
else: opts.timeout = int(opts.timeout)
if not opts.inputfile and not opts.inputjson:
parser.print_help()
sys.exit(-1)
if opts.inputfile:
isi = open(opts.inputfile).read().split("\n")
word_list = []
for i in isi:
temp = i.strip().split(" ")
item = [temp[0], " ".join(temp[1:len(temp)-1])]
word_list.append(item)
if opts.inputjson: word_list = json.loads(opts.inputjson)
tts = Crossword(opts.rows, opts.columns, '-', opts.timeout, word_list)
rendered = render_js(tts)
dumped = json.dumps(rendered)
if opts.output: open(opts.output, "w").write(dumped)
print dumped
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment