Skip to content

Instantly share code, notes, and snippets.

@Konctantin
Created September 12, 2015 13:10
Show Gist options
  • Save Konctantin/094bddb98569caa2328a to your computer and use it in GitHub Desktop.
Save Konctantin/094bddb98569caa2328a to your computer and use it in GitHub Desktop.
wowhead parser
import sys
from urllib.request import urlopen
from time import sleep
def clenuap(content):
return content.strip() \
.replace("<br />", "\\n") \
.replace("&lt;", "<") \
.replace("&gt;", ">") \
.replace("&nbsp;", " ") \
.replace("&quot;", "\"") \
.replace("&amp;", "&") \
.replace("'", "\\'") \
.replace("\"", "\\\"") \
.replace("<name>", "$N") \
.replace("<имя>", "$N") \
.replace("<класс>", "$C") \
.replace("<class>", "$C") \
.replace("<race>", "$R") \
.replace("<раса>", "$R") \
.replace("|3-6(Воин)", "|3-6($C)") \
.replace("|3-6(Паладин)", "|3-6($C)") \
.replace("|3-6(Охотник)", "|3-6($C)") \
.replace("|3-6(Разбойник)", "|3-6($C)") \
.replace("|3-6(Жрец)", "|3-6($C)") \
.replace("|3-6(Рыцарь смерти)","|3-6($C)") \
.replace("|3-6(Маг)", "|3-6($C)") \
.replace("|3-6(Чернокнижник)", "|3-6($C)") \
.replace("|3-6(Монах)", "|3-6($C)") \
.replace("|3-6(Друид)", "|3-6($C)") \
.replace("|3-6(Человек)", "|3-6($R)") \
.replace("|3-6(Гном)", "|3-6($R)") \
.replace("|3-6(Дреней)", "|3-6($R)") \
.replace("|3-6(Дворф)", "|3-6($R)") \
.replace("|3-6(Орк)", "|3-6($R)") \
.replace("|3-6(Таурен)", "|3-6($R)") \
.replace("|3-6(Троль)", "|3-6($R)") \
.replace("|3-6(Нежить)", "|3-6($R)") \
.replace("|3-6(Эльф крови)", "|3-6($R)") \
.replace("|3-6(Ночной эльф)", "|3-6($R)") \
with open("output.sql", "w") as sql_file:
def parse(entry, table_name, content, start_pattern, end_pattern):
spos = content.find(start_pattern)
endpos = content.find(end_pattern, spos)
if (spos > -1 and endpos > -1):
start = spos + len(start_pattern)
text = content[start:endpos]
sql = "INSERT INTO `%s` (entry, text) VALUES (%d, '%s');\n" % (table_name, entry, clenuap(text))
sql_file.writelines(sql)
sql_file.flush()
print(sql)
else:
print("No matches found for quest: %d -> '%s'" % (entry, start_pattern))
pass
sql_file.writelines(
"DROP TABLE IF EXISTS `RequestItemsText`;\n"+\
"CREATE TABLE `RequestItemsText` (\n"+\
" `entry` MEDIUMINT(8) UNSIGNED NOT NULL,\n"+\
" `text` TEXT DEFAULT NULL\n"+\
") ENGINE=MyISAM DEFAULT CHARSET=utf8 ROW_FORMAT=FIXED;\n\n"+\
"DROP TABLE IF EXISTS `OfferRewardText`;\n"+\
"CREATE TABLE `OfferRewardText` (\n"+\
" `entry` MEDIUMINT(8) UNSIGNED NOT NULL,\n"+\
" `text` TEXT DEFAULT NULL\n"+\
") ENGINE=MyISAM DEFAULT CHARSET=utf8 ROW_FORMAT=FIXED;\n\n")
try:
entryList = open("list2.txt", "r").readlines()
print("Use defined list!");
except:
entryList = range(1, 60000)
print("Use range!")
for entry in entryList:
entry = int(entry)
url = "http://ru.wowhead.com/quest=%d" % entry
try:
content = urlopen(url)\
.read()\
.decode("utf-8")
parse(entry, "RequestItemsText", content, "progress\" style=\"display: none\">", "</div>")
parse(entry, "OfferRewardText", content, "completion\" style=\"display: none\">", "</div>")
except Exception as ex:
print("Err: %d (%s)" % (entry, ex))
sleep(1)
print("Done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment