NWPlayer123 · March 31, 2020 04:35
diff --git a/scrape_data.py b/scrape_data.py
 from bcsv_reader import BCSV
 from msbt_reader import MSBT
 from binascii import hexlify
 from os import listdir
 import sys, string, codecs
 '''reload(sys)
 sys.setdefaultencoding('utf8')'''

 msg_path = "../message1.1"
 bcsv_path = "."
 output_name = "scrape_data_test.txt"

 def getindices(s):
    return [i for i, c in enumerate(s) if c.isupper()]

 #grab item ids and names
 item_strings = []
 item_id2str = {}
 item_str2id = {}
 for filename in listdir(msg_path + "/String_USen/Item/"):
    if filename.endswith(".msbt"):
        labels, text = MSBT().read_msbt("%s/String_USen/Item/%s" % (msg_path, filename))
        labels2 = []
        for entry in labels: #take from 3D groups to 2D array
            for entry2 in entry:
                labels2.append(entry2)
        labels2.sort(key=lambda x: x[1]) #sort by text index
        for entry in labels2:
            if not entry[0].endswith(b"_pl"): #filter out plural
                itemid = int(entry[0].split(b"_")[1]) #Rug_07322 -> 7322
                itemname = text[entry[1]][0]
                item_strings.append([itemid, itemname])
                item_id2str[itemid] = itemname
                item_str2id[itemname] = itemid

 #grab clothing ids and names
 cloth_strings = []
 cloth_id2str = {}
 cloth_str2id = {}
 for filename in listdir(msg_path + "/String_USen/Outfit/GroupName/"):
    if filename.endswith(".msbt"):
        labels, text = MSBT().read_msbt("%s/String_USen/Outfit/GroupName/%s" % (msg_path, filename))
        labels2 = []
        for entry in labels: #take from 3D groups to 2D array
            for entry2 in entry:
                labels2.append(entry2)
        labels2.sort(key=lambda x: x[1]) #sort by text index
        for entry in labels2:
            itemid = int(entry[0])
            itemname = text[entry[1]][0]
            cloth_strings.append([itemid, itemname])
            cloth_id2str[itemid] = itemname
            cloth_str2id[itemname] = itemid

 #convert cloth_itemid -> item_itemid
 cloth_data = BCSV().read_bcsv(bcsv_path + "/ItemClothGroup.bcsv")
 cloth_lookup = {}
 cloth_lookup2 = {}
 cloth_data2 = []
 for entry in cloth_data:
    clothid  = entry[0x54706054] #string id
    itemid   = entry[0x65503F9F] #item id
    itemname = entry[0x13AB5198].decode("UTF-8").rstrip(u"\0").encode("UTF-8") #TODO: assert at end?
    itemdesc = entry[0x036E8EBE].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
    cloth_lookup[clothid] = itemid
    cloth_lookup2[itemname] = itemid
    cloth_data2.append([clothid, itemid, itemname, itemdesc])

 #add clothes to item_strings to add in next step, skip if 
 for entry in cloth_data2:
    try: #store the string names that match, we'll extrapolate later
        item_strings.append([entry[1], cloth_id2str[entry[0]]])
    except: pass

 #create lookup table for everything
 item_lookup = {}
 for entry in item_strings:
    item_lookup[entry[0]] = entry[1]

 #parse table for IDs and filenames
 item_data = BCSV().read_bcsv(bcsv_path + "/ItemParam.bcsv")
 item_table = []
 do_later = []
 dupe_lookup = {}
 skipme = False
 with open(output_name, "wb") as o:
    o.write(b"Item ID (hex), Buy Price, Sell Price, Item Name, item_type, file_name, file_desc\r\n")
    for entry in item_data:
        item_id = entry[0x54706054]
        buy_price = entry[0x718B024D]
        sell_price = int(buy_price / 4.0)
        file_name = entry[0x3FEBC642].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
        item_type = entry[0xFC275E86].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
        try:
            file_desc = entry[0xB8CC232C].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
        except: #bcsv stupid limit, 0x40 truncated string
            file_desc = b"FILE_DESC_TRUNCATED " + entry[0xB8CC232C].decode("UTF-8", "ignore").encode("UTF-8")

        try:
            item_name = item_lookup[item_id] #check if we have the name for that id, else do_later
            item_table.append([item_id, buy_price, sell_price, item_name, item_type, file_name, file_desc])
            dupe_lookup[file_name.rstrip(string.digits)] = item_name #store base for most stuff, e.g. 0, 1, 2 of thing needs to be looked up
            indices = getindices(file_name) #strip end of string so e.g. TwotoneMonotone + variants
            #o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (item_id, buy_price, sell_price, item_name.encode("UTF-8"), item_type, file_name, file_desc))
            if len(indices) > 1 and len(file_name[:indices[-1]]) > 5:
                dupe_lookup[file_name[:indices[-1]]] = item_name #store item name for color variants
        except:
            do_later.append([item_id, buy_price, sell_price, item_type, file_name, file_desc])
    for entry in do_later:
        file_name = entry[4]
        try:
            item_name = dupe_lookup[file_name.rstrip(string.digits)]
        except:
            try: 
                indices = getindices(file_name)
                item_name = dupe_lookup[file_name[:indices[-1]]]
            except: item_name = b"UNUSED"
        print("%s %s" % (entry[4], item_name.encode("UTF-8")))
        item_table.append([entry[0], entry[1], entry[2], item_name, entry[3], entry[4], entry[5]])
    item_table.sort(key=lambda x: x[0]) #sort by item_id
    for entry in item_table:
        o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (entry[0], entry[1], entry[2], entry[3].encode("UTF-8"), entry[4], entry[5], entry[6]))
	from bcsv_reader import BCSV
	from msbt_reader import MSBT
	from binascii import hexlify
	from os import listdir
	import sys, string, codecs
	'''reload(sys)
	sys.setdefaultencoding('utf8')'''

	msg_path = "../message1.1"
	bcsv_path = "."
	output_name = "scrape_data_test.txt"

	def getindices(s):
	return [i for i, c in enumerate(s) if c.isupper()]

	#grab item ids and names
	item_strings = []
	item_id2str = {}
	item_str2id = {}
	for filename in listdir(msg_path + "/String_USen/Item/"):
	if filename.endswith(".msbt"):
	labels, text = MSBT().read_msbt("%s/String_USen/Item/%s" % (msg_path, filename))
	labels2 = []
	for entry in labels: #take from 3D groups to 2D array
	for entry2 in entry:
	labels2.append(entry2)
	labels2.sort(key=lambda x: x[1]) #sort by text index
	for entry in labels2:
	if not entry[0].endswith(b"_pl"): #filter out plural
	itemid = int(entry[0].split(b"_")[1]) #Rug_07322 -> 7322
	itemname = text[entry[1]][0]
	item_strings.append([itemid, itemname])
	item_id2str[itemid] = itemname
	item_str2id[itemname] = itemid

	#grab clothing ids and names
	cloth_strings = []
	cloth_id2str = {}
	cloth_str2id = {}
	for filename in listdir(msg_path + "/String_USen/Outfit/GroupName/"):
	if filename.endswith(".msbt"):
	labels, text = MSBT().read_msbt("%s/String_USen/Outfit/GroupName/%s" % (msg_path, filename))
	labels2 = []
	for entry in labels: #take from 3D groups to 2D array
	for entry2 in entry:
	labels2.append(entry2)
	labels2.sort(key=lambda x: x[1]) #sort by text index
	for entry in labels2:
	itemid = int(entry[0])
	itemname = text[entry[1]][0]
	cloth_strings.append([itemid, itemname])
	cloth_id2str[itemid] = itemname
	cloth_str2id[itemname] = itemid

	#convert cloth_itemid -> item_itemid
	cloth_data = BCSV().read_bcsv(bcsv_path + "/ItemClothGroup.bcsv")
	cloth_lookup = {}
	cloth_lookup2 = {}
	cloth_data2 = []
	for entry in cloth_data:
	clothid = entry[0x54706054] #string id
	itemid = entry[0x65503F9F] #item id
	itemname = entry[0x13AB5198].decode("UTF-8").rstrip(u"\0").encode("UTF-8") #TODO: assert at end?
	itemdesc = entry[0x036E8EBE].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
	cloth_lookup[clothid] = itemid
	cloth_lookup2[itemname] = itemid
	cloth_data2.append([clothid, itemid, itemname, itemdesc])

	#add clothes to item_strings to add in next step, skip if
	for entry in cloth_data2:
	try: #store the string names that match, we'll extrapolate later
	item_strings.append([entry[1], cloth_id2str[entry[0]]])
	except: pass

	#create lookup table for everything
	item_lookup = {}
	for entry in item_strings:
	item_lookup[entry[0]] = entry[1]

	#parse table for IDs and filenames
	item_data = BCSV().read_bcsv(bcsv_path + "/ItemParam.bcsv")
	item_table = []
	do_later = []
	dupe_lookup = {}
	skipme = False
	with open(output_name, "wb") as o:
	o.write(b"Item ID (hex), Buy Price, Sell Price, Item Name, item_type, file_name, file_desc\r\n")
	for entry in item_data:
	item_id = entry[0x54706054]
	buy_price = entry[0x718B024D]
	sell_price = int(buy_price / 4.0)
	file_name = entry[0x3FEBC642].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
	item_type = entry[0xFC275E86].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
	try:
	file_desc = entry[0xB8CC232C].decode("UTF-8").rstrip(u"\0").encode("UTF-8")
	except: #bcsv stupid limit, 0x40 truncated string
	file_desc = b"FILE_DESC_TRUNCATED " + entry[0xB8CC232C].decode("UTF-8", "ignore").encode("UTF-8")

	try:
	item_name = item_lookup[item_id] #check if we have the name for that id, else do_later
	item_table.append([item_id, buy_price, sell_price, item_name, item_type, file_name, file_desc])
	dupe_lookup[file_name.rstrip(string.digits)] = item_name #store base for most stuff, e.g. 0, 1, 2 of thing needs to be looked up
	indices = getindices(file_name) #strip end of string so e.g. TwotoneMonotone + variants
	#o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (item_id, buy_price, sell_price, item_name.encode("UTF-8"), item_type, file_name, file_desc))
	if len(indices) > 1 and len(file_name[:indices[-1]]) > 5:
	dupe_lookup[file_name[:indices[-1]]] = item_name #store item name for color variants
	except:
	do_later.append([item_id, buy_price, sell_price, item_type, file_name, file_desc])
	for entry in do_later:
	file_name = entry[4]
	try:
	item_name = dupe_lookup[file_name.rstrip(string.digits)]
	except:
	try:
	indices = getindices(file_name)
	item_name = dupe_lookup[file_name[:indices[-1]]]
	except: item_name = b"UNUSED"
	print("%s %s" % (entry[4], item_name.encode("UTF-8")))
	item_table.append([entry[0], entry[1], entry[2], item_name, entry[3], entry[4], entry[5]])
	item_table.sort(key=lambda x: x[0]) #sort by item_id
	for entry in item_table:
	o.write("%04X, %d, %d, %s, %s, %s, %s\r\n" % (entry[0], entry[1], entry[2], entry[3].encode("UTF-8"), entry[4], entry[5], entry[6]))
No results found