wesleywerner · February 18, 2018 08:24
diff --git a/Godville Crossword Solver b/Godville Crossword Solver
 '''
    Godville crossword solver by Goddess Shadowed Princess

    Copyleft 2018, licensed under GPL 3

    Requirements: python, html2text (pip install html2text)

    Version 1
 '''

 # enable debugging
 DEBUG = False

 VERSION = 1

 # the database file extension
 extension = ".gcs"

 # system exit function
 import sys

 # for regular expression searching
 import re

 # for downloading web content
 import urllib2

 # for extracting text from html
 try:
    import html2text
 except ImportError:
    print("Looks like you are missing some dependencies. Please pip install html2text")
    raw_input("Press Enter to continue...")
    sys.exit()

 # for listing of files
 from os import listdir

 # get this script's path
 from os import path
 scriptPath = path.dirname(path.realpath(__file__))

 # load list of searchable files
 dbfiles = None
 dbdata = None

 def loadDatabase():

    # Only load lines that that contain a PIPE character
    # and only the text up to the pipe occurence
    ONLY_PIPED_LINES = True

    # list of db files
    global dbfiles
    dbfiles = [f for f in listdir(scriptPath) if f.endswith(extension)]

    if DEBUG:
        print("# List of files found:")
        for n in dbfiles:
            print("\t%s" % (n))

    # load file contents
    global dbdata
    dbdata = []
    linecount = 0
    for dbf in dbfiles:
        dbptr = open(dbf, "r")
        lines = dbptr.readlines()
        dbptr.close()
        linecount = linecount + len(lines)
        if ONLY_PIPED_LINES:
            culledLines = []
            for oneline in lines:
                pipePosition = oneline.find("|")
                if pipePosition > -1:
                    culledLines.append(oneline[0:pipePosition])
            dbdata.append(culledLines)    
        else:
            # take every line in the file
            dbdata.append(lines)

    if DEBUG:
        print("# %d lines of text loaded" % (linecount))

 def extractURL(url):
    
    # reverse url to get the page name
    pagename = url[::-1]
    slashpos = pagename.find("/")
    pagename = pagename[0:slashpos][::-1]

    print("\t* Fetching " + pagename)
    
    # get page content
    response = urllib2.urlopen(url)
    webContent = response.read().decode('utf8')

    # extract text
    converter = html2text.HTML2Text()
    converter.ignore_links = True
    converter.ignore_images = True
    
    plaintext = converter.handle(webContent)

    fileptr = open(pagename + extension, "w")
    fileptr.write(plaintext.encode('utf8'))
    fileptr.close()

 def downloadPages():

    print("Fetching pages from the wiki, this shouldn't take long...")
    
    # list of pages to get
    pages = (
        "https://wiki.godvillegame.com/List_of_Artifacts",
        "https://wiki.godvillegame.com/List_of_Equipment",
        "https://wiki.godvillegame.com/List_of_Monsters",
        "https://wiki.godvillegame.com/List_of_Quests",
        "https://wiki.godvillegame.com/List_of_Skills",
        "https://wiki.godvillegame.com/Milestones"
        )

    # process each page
    for url in pages:
        extractURL(url)

    loadDatabase()

 def searchquery(query):

    #print("Searching for %s" % (query))

    # case insensitive match
    pattern = re.compile(query, re.IGNORECASE)
    
    for index in range(0, len(dbfiles)):

        if DEBUG:
            print("# file %s" % (dbfiles[index]))

        for line in dbdata[index]:
            match = re.search(pattern, line)
            if match:
                print("\t%s" % (match.string))

 def startup():
    
    # auto download if pages are missing
    loadDatabase()
    if len(dbfiles) == 0:
        downloadPages()

    # print useful words
    print("Godville crossword solver by Goddess Shadowed Princess (version %d)" % (VERSION))
    print("Copyleft 2018, licensed under GPL 3")
    print("(enter HELP for help)")

 def printhelp():
    print("")
    print("Enter the word to search for, replacing any unknown letters with \".\"")
    print("Example: \"..dden..g\" will find \"hidden agenda\".")
    print("\nDOWNLOAD will fetch the latest lists from the wiki.")
    print("Press ^C or ^D to quit")

 def menu():
    while True:
        command = raw_input("\nSearch: ").upper()
        if command == "HELP":
            printhelp()
        elif command == "DOWNLOAD":
            downloadPages()
        else:
            searchquery(command)

 if DEBUG:
    print("# Script path is %s" % (scriptPath))
    
 startup()

 try:
    menu()
 except (KeyboardInterrupt, EOFError):
    print("Goodbye o/")
	'''
	Godville crossword solver by Goddess Shadowed Princess

	Copyleft 2018, licensed under GPL 3

	Requirements: python, html2text (pip install html2text)

	Version 1
	'''

	# enable debugging
	DEBUG = False

	VERSION = 1

	# the database file extension
	extension = ".gcs"

	# system exit function
	import sys

	# for regular expression searching
	import re

	# for downloading web content
	import urllib2

	# for extracting text from html
	try:
	import html2text
	except ImportError:
	print("Looks like you are missing some dependencies. Please pip install html2text")
	raw_input("Press Enter to continue...")
	sys.exit()

	# for listing of files
	from os import listdir

	# get this script's path
	from os import path
	scriptPath = path.dirname(path.realpath(__file__))

	# load list of searchable files
	dbfiles = None
	dbdata = None

	def loadDatabase():

	# Only load lines that that contain a PIPE character
	# and only the text up to the pipe occurence
	ONLY_PIPED_LINES = True

	# list of db files
	global dbfiles
	dbfiles = [f for f in listdir(scriptPath) if f.endswith(extension)]

	if DEBUG:
	print("# List of files found:")
	for n in dbfiles:
	print("\t%s" % (n))

	# load file contents
	global dbdata
	dbdata = []
	linecount = 0
	for dbf in dbfiles:
	dbptr = open(dbf, "r")
	lines = dbptr.readlines()
	dbptr.close()
	linecount = linecount + len(lines)
	if ONLY_PIPED_LINES:
	culledLines = []
	for oneline in lines:
	pipePosition = oneline.find("\|")
	if pipePosition > -1:
	culledLines.append(oneline[0:pipePosition])
	dbdata.append(culledLines)
	else:
	# take every line in the file
	dbdata.append(lines)

	if DEBUG:
	print("# %d lines of text loaded" % (linecount))

	def extractURL(url):

	# reverse url to get the page name
	pagename = url[::-1]
	slashpos = pagename.find("/")
	pagename = pagename[0:slashpos][::-1]

	print("\t* Fetching " + pagename)

	# get page content
	response = urllib2.urlopen(url)
	webContent = response.read().decode('utf8')

	# extract text
	converter = html2text.HTML2Text()
	converter.ignore_links = True
	converter.ignore_images = True

	plaintext = converter.handle(webContent)

	fileptr = open(pagename + extension, "w")
	fileptr.write(plaintext.encode('utf8'))
	fileptr.close()

	def downloadPages():

	print("Fetching pages from the wiki, this shouldn't take long...")

	# list of pages to get
	pages = (
	"https://wiki.godvillegame.com/List_of_Artifacts",
	"https://wiki.godvillegame.com/List_of_Equipment",
	"https://wiki.godvillegame.com/List_of_Monsters",
	"https://wiki.godvillegame.com/List_of_Quests",
	"https://wiki.godvillegame.com/List_of_Skills",
	"https://wiki.godvillegame.com/Milestones"
	)

	# process each page
	for url in pages:
	extractURL(url)

	loadDatabase()

	def searchquery(query):

	#print("Searching for %s" % (query))

	# case insensitive match
	pattern = re.compile(query, re.IGNORECASE)

	for index in range(0, len(dbfiles)):

	if DEBUG:
	print("# file %s" % (dbfiles[index]))

	for line in dbdata[index]:
	match = re.search(pattern, line)
	if match:
	print("\t%s" % (match.string))

	def startup():

	# auto download if pages are missing
	loadDatabase()
	if len(dbfiles) == 0:
	downloadPages()

	# print useful words
	print("Godville crossword solver by Goddess Shadowed Princess (version %d)" % (VERSION))
	print("Copyleft 2018, licensed under GPL 3")
	print("(enter HELP for help)")

	def printhelp():
	print("")
	print("Enter the word to search for, replacing any unknown letters with \".\"")
	print("Example: \"..dden..g\" will find \"hidden agenda\".")
	print("\nDOWNLOAD will fetch the latest lists from the wiki.")
	print("Press ^C or ^D to quit")

	def menu():
	while True:
	command = raw_input("\nSearch: ").upper()
	if command == "HELP":
	printhelp()
	elif command == "DOWNLOAD":
	downloadPages()
	else:
	searchquery(command)

	if DEBUG:
	print("# Script path is %s" % (scriptPath))

	startup()

	try:
	menu()
	except (KeyboardInterrupt, EOFError):
	print("Goodbye o/")