Created
December 2, 2017 04:32
-
-
Save CalmBit/8bf05af7c825cd623c6eebb634483675 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <sstream> | |
#include <iostream> | |
#include <vector> | |
#include <string> | |
#include <map> | |
const wchar_t CODEPAGE[256] = | |
{ | |
' ', 9786, 9787, 9829, 9830, 9827, 9824, 8226, 9688, 9675, 9689, 9794, 9792, 9834, 9835, 9788, | |
9658, 9668, 8597, 8252, 182, 167, 9644, 8616, 8593, 8595, 8594, 8592, 8735, 8596, 9650, 9660, | |
' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',','-', '.', '/', | |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', | |
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', | |
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']','^', '_', | |
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', | |
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 8962, | |
199, 252, 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, 238, 236, 196, 197, | |
201, 230, 198, 244, 246, 242, 251, 249, 255, 214, 220, 162, 163, 165, 8359, 402, | |
225, 237, 243, 250, 241, 209, 170, 186, 191, 8976, 172, 189, 188, 161, 171, 187, | |
9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488, | |
9492, 9524, 9516, 9500, 9472, 9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575, | |
9576, 9572, 9573, 9561, 9560, 9554, 9555, 9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600, | |
945, 223, 915, 960, 931, 963, 181, 964, 934, 920, 937, 948, 8734, 966, 949, 8745, | |
8801, 177, 8805, 8804, 8992, 8993, 247, 8776, 176, 8729, 183, 8730, 8319, 178, 9632, ' ' | |
}; | |
int getShortFromCharArray(char arr[]) { | |
return static_cast<unsigned char>(arr[0]) + (static_cast<unsigned char>(arr[1]) << 8); | |
} | |
int getIntgrFromCharArray(char arr[]) { | |
return static_cast<unsigned char>(arr[0]) + (static_cast<unsigned char>(arr[1]) << 8) | |
+ (static_cast<unsigned char>(arr[2]) << 16) + (static_cast<unsigned char>(arr[3]) << 24); | |
} | |
int main(int argc, char* argv[]) { | |
// Seriously, fuck C/C++ and their inability to even come close to | |
// modern fucking character encoding. | |
std::locale::global(std::locale("")); | |
if(argc < 2) { | |
std::cout << "Usage: " << argv[0] << " <world.dat>" << std::endl; | |
return 0; | |
} | |
std::ifstream datFile{argv[1], std::ifstream::in|std::ifstream::binary}; | |
if(!datFile.good()) { | |
std::cerr << "Error loading file " << argv[1] << std::endl; | |
return -1; | |
} | |
datFile.seekg(0xE8); | |
// (final testing offset is 0x13D0D) | |
std::vector<std::vector<std::wstring>> bufferList{}; | |
while(datFile.tellg() < 0x130D0D) { | |
char metaBuffer[4] = {}; | |
datFile.read(metaBuffer, 4); | |
int numLists = getIntgrFromCharArray(metaBuffer); | |
if(numLists == 0) | |
continue; | |
for(auto l = 0;l < numLists;l++) { | |
char szBuffer[4] = {}; | |
datFile.read(szBuffer, 4); | |
int listSize = getIntgrFromCharArray(szBuffer); | |
if(listSize == 0) { | |
datFile.seekg(4, std::ifstream::cur); | |
continue; | |
} | |
std::vector<std::wstring> stringList{}; | |
for(auto i = 0;i < listSize;i++) { | |
char strSzBuffer[2] = {}; | |
datFile.read(strSzBuffer, 2); | |
int strSize = getShortFromCharArray(strSzBuffer); | |
char *strBuffer = new char[strSize+1]; | |
datFile.read(strBuffer, strSize); | |
strBuffer[strSize] = 0x00; | |
std::wstringstream finalStr{}; | |
for(auto c = 0;c < strSize;c++) { | |
finalStr << CODEPAGE[static_cast<unsigned char>(strBuffer[c])]; | |
} | |
stringList.push_back(finalStr.str()); | |
delete[](strBuffer); | |
} | |
bufferList.push_back(stringList); | |
} | |
} | |
std::wcout << bufferList.at(0).at(0) << std::endl; | |
datFile.close(); | |
std::map<std::wstring, int> categories{}; | |
for(unsigned int i = 0;i < bufferList.size();i++) { | |
std::wstring title = bufferList.at(i).at(0); | |
std::stringstream realTitle{}; | |
for(auto i =0;i < title.size();i+=1) { | |
realTitle << static_cast<char>(title.c_str()[i]); | |
} | |
if(categories.find(title) == categories.end()) { | |
categories.insert(std::pair<std::wstring, int>{title, 0}); | |
} | |
std::stringstream filename{}; | |
filename << realTitle.str() << "_" << categories.at(title)++ << ".txt"; | |
std::wofstream outFile{filename.str()}; | |
for(auto j = bufferList.at(i).begin();j != bufferList.at(i).end();j++) { | |
outFile << *j << std::endl; | |
} | |
outFile.close(); | |
} | |
std::cout << "Finished processing raw files." << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment