Last active
February 22, 2019 07:52
-
-
Save dshadowwolf/6de4df55d4883003e0f7037e95ecf657 to your computer and use it in GitHub Desktop.
tiny utility to automate conversion of MC 1.12 and prior ".lang" files to the new (1.13+) json format for them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c) 2019 Daniel "DShadowWolf" Hazelton | |
* GNU GPL v2 only | |
* | |
* build: gcc -o convert-lang-files convert-lang-files.c -lpcre | |
* use: convert-lang-files [input file] > [output file] [2> [logfile]] | |
* "[input file]" and "[output file]" are required, the "2>" bit is redirecting possible error output to a log-file | |
* | |
* NOTE: This will break if it finds a line that does not match <key>=<value> or #<comment> in the file outside of blank lines | |
*/ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <errno.h> | |
#include <pcre.h> | |
#include <libgen.h> | |
#include "iso_bits.h" | |
#define OVECCOUNT 90 | |
const char* getfield(char* line, int num) | |
{ | |
const char* tok; | |
for (tok = strtok(line, ","); | |
tok && *tok; | |
tok = strtok(NULL, ",\n")) | |
{ | |
fflush(stdout); | |
if (!--num) | |
return tok; | |
} | |
return NULL; | |
} | |
const char *findValue(char **lines, int numlines, char *code) { | |
for(int i = 0; i < numlines; i++) { | |
char *wl = strdup(lines[i]); | |
const char *idf = getfield(wl, 1); | |
if(strcmp(code, idf) == 0) | |
return lines[i]; | |
} | |
fflush(stdout); | |
return NULL; | |
} | |
const char *getCC(char *code) { | |
const char *line = findValue(cc_csv, cc_len, code); | |
if(line != NULL) { | |
char *tmp = strdup(line); | |
return getfield(tmp, 2); | |
} | |
return NULL; | |
} | |
const char *getLC(char *code) { | |
const char *line = findValue(lc_csv, lc_len, code); | |
if(line != NULL) { | |
char *tmp = strdup(line); | |
return getfield(tmp, 2); | |
} | |
return NULL; | |
} | |
int main(int argc, char *argv[]) { | |
if(argc != 2) { | |
printf("Need to give an input file name!\n"); | |
exit(1); | |
} | |
FILE *infile = fopen(argv[1], "r"); | |
if(infile == NULL) { | |
perror("fopen"); | |
return -1; | |
} | |
char *bn = basename(argv[1]); | |
char *cc = malloc(3*sizeof(char)); | |
char *lc = malloc(3*sizeof(char)); | |
char *full_code = malloc(6*sizeof(char)); | |
memset(cc,0,3); | |
memset(lc,0,3); | |
memset(full_code, 0, 6); | |
strncpy(lc, bn, 2); | |
strncpy(cc, bn+3, 2); | |
strncpy(full_code, bn, 5); | |
const char *language = getLC(lc); | |
const char *country = getCC(cc); | |
free(cc); | |
free(lc); | |
char *input = malloc(8192); | |
int ovector[OVECCOUNT]; | |
const char *error; | |
int error_offset; | |
int match_res; | |
size_t sz = 8192; | |
int first = 1; | |
pcre *pattern_conv_base = pcre_compile("^(\\s*((.*)\\s*=\\s*(.*)|#(.*))\\s*|(\\s+))$", PCRE_MULTILINE, &error, &error_offset, NULL); | |
if (pattern_conv_base == NULL) { | |
fprintf(stderr, "PCRE compilation failed at offset %d: %s\n", error_offset, error); | |
return -2; | |
} | |
pcre_extra *study = pcre_study(pattern_conv_base, 0, &error); | |
if(error != NULL) { | |
fprintf(stderr, "ERROR: Could not study '^\\s*((.*)\\s*=\\s*(.*)|(#.*))\\s*$': %s\n", error); | |
return -3; | |
} | |
printf("{\n"); | |
printf("\t\"language.name\": \"%s\",\n", language); | |
printf("\t\"language.region\": \"%s\",\n", country); | |
printf("\t\"language.code\": \"%s\",\n", full_code); | |
free(full_code); | |
int res = getline(&input, &sz, infile); | |
while(res > 0) { | |
if(res == 0) continue; | |
else if(res < 0) break; | |
match_res = pcre_exec(pattern_conv_base, study, input, res, 0, 0, ovector, OVECCOUNT ); | |
if(match_res != PCRE_ERROR_NOMATCH && match_res < 0) { | |
printf("error (%d)\n", match_res); | |
break; | |
} else if(match_res == PCRE_ERROR_NOMATCH) { | |
continue; | |
} | |
if(match_res == 6) { | |
char *ss = input + ovector[10]; | |
int sl = ovector[11] - ovector[10]; | |
printf("\t\"_comment\": \"%.*s\"", sl, ss); | |
} else if(match_res == 5) { | |
char *ssn = input + ovector[6]; | |
char *ssv = input + ovector[8]; | |
int sln = ovector[7] - ovector[6]; | |
int slv = ovector[9] - ovector[8]; | |
if(strncmp("tile.", ssn, 5) == 0) { | |
char *tb = malloc(sln+2); | |
memset(tb, 0, sln+2); | |
strcat(tb, "block."); | |
strcat(tb, ssn+5); | |
// free(ssn); | |
ssn = tb; | |
sln++; | |
} | |
printf("\t\"%.*s\": \"%.*s\"", sln, ssn, slv, ssv); | |
} | |
fflush(stdout); | |
res = getline(&input, &sz, infile); | |
if(match_res >= 5 && match_res < 7) { | |
if(res > 0) | |
printf(",\n"); | |
else | |
printf("\n"); | |
} | |
} | |
if(res < 0 && errno != 0) | |
perror("getline"); | |
printf("}\n"); | |
pcre_free(pattern_conv_base); /* Release the memory used for the compiled pattern */ | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* ISO identifier mappings for languages and countries | |
*/ | |
// number of entries in each table | |
const int cc_len = 249; | |
const int lc_len = 184; | |
// country codes | |
char *cc_csv[] = { | |
"af,Afghanistan", | |
"ax,Aland Islands", | |
"al,Albania", | |
"dz,Algeria", | |
"as,American Samoa", | |
"ad,Andorra", | |
"ao,Angola", | |
"ai,Anguilla", | |
"aq,Antarctica", | |
"ag,Antigua and Barbuda", | |
"ar,Argentina", | |
"am,Armenia", | |
"aw,Aruba", | |
"au,Australia", | |
"at,Austria", | |
"az,Azerbaijan", | |
"bs,Bahamas", | |
"bh,Bahrain", | |
"bd,Bangladesh", | |
"bb,Barbados", | |
"by,Belarus", | |
"be,Belgium", | |
"bz,Belize", | |
"bj,Benin", | |
"bm,Bermuda", | |
"bt,Bhutan", | |
"bo,Bolivia", | |
"bq,Bonaire; Sint Eustatius and Saba", | |
"ba,Bosnia and Herzegovina", | |
"bw,Botswana", | |
"bv,Bouvet Island", | |
"br,Brazil", | |
"vg,British Virgin Islands", | |
"io,British Indian Ocean Territory", | |
"bn,Brunei Darussalam", | |
"bg,Bulgaria", | |
"bf,Burkina Faso", | |
"bi,Burundi", | |
"kh,Cambodia", | |
"cm,Cameroon", | |
"ca,Canada", | |
"cv,Cape Verde", | |
"ky,Cayman Islands", | |
"cf,Central African Republic", | |
"td,Chad", | |
"cl,Chile", | |
"cn,China", | |
"hk,Hong Kong; Special Administrative Region of China", | |
"mo,Macao; Special Administrative Region of China", | |
"cx,Christmas Island", | |
"cc,Cocos (Keeling) Islands", | |
"co,Colombia", | |
"km,Comoros", | |
"cg,Congo (Brazzaville)", | |
"cd,Congo; Democratic Republic of the", | |
"ck,Cook Islands", | |
"cr,Costa Rica", | |
"ci,Côte d'Ivoire", | |
"hr,Croatia", | |
"cu,Cuba", | |
"cw,Curaçao", | |
"cy,Cyprus", | |
"cz,Czech Republic", | |
"dk,Denmark", | |
"dj,Djibouti", | |
"dm,Dominica", | |
"do,Dominican Republic", | |
"ec,Ecuador", | |
"eg,Egypt", | |
"sv,El Salvador", | |
"gq,Equatorial Guinea", | |
"er,Eritrea", | |
"ee,Estonia", | |
"et,Ethiopia", | |
"fk,Falkland Islands (Malvinas)", | |
"fo,Faroe Islands", | |
"fj,Fiji", | |
"fi,Finland", | |
"fr,France", | |
"gf,French Guiana", | |
"pf,French Polynesia", | |
"tf,French Southern Territories", | |
"ga,Gabon", | |
"gm,Gambia", | |
"ge,Georgia", | |
"de,Germany", | |
"gh,Ghana", | |
"gi,Gibraltar", | |
"gr,Greece", | |
"gl,Greenland", | |
"gd,Grenada", | |
"gp,Guadeloupe", | |
"gu,Guam", | |
"gt,Guatemala", | |
"gg,Guernsey", | |
"gn,Guinea", | |
"gw,Guinea-Bissau", | |
"gy,Guyana", | |
"ht,Haiti", | |
"hm,Heard Island and Mcdonald Islands", | |
"va,Holy See (Vatican City State)", | |
"hn,Honduras", | |
"hu,Hungary", | |
"is,Iceland", | |
"in,India", | |
"id,Indonesia", | |
"ir,Iran; Islamic Republic of", | |
"iq,Iraq", | |
"ie,Ireland", | |
"im,Isle of Man", | |
"il,Israel", | |
"it,Italy", | |
"jm,Jamaica", | |
"jp,Japan", | |
"je,Jersey", | |
"jo,Jordan", | |
"kz,Kazakhstan", | |
"ke,Kenya", | |
"ki,Kiribati", | |
"kp,Korea; Democratic People's Republic of", | |
"kr,Korea; Republic of", | |
"kw,Kuwait", | |
"kg,Kyrgyzstan", | |
"la,Lao PDR", | |
"lv,Latvia", | |
"lb,Lebanon", | |
"ls,Lesotho", | |
"lr,Liberia", | |
"ly,Libya", | |
"li,Liechtenstein", | |
"lt,Lithuania", | |
"lu,Luxembourg", | |
"mk,Macedonia; Republic of", | |
"mg,Madagascar", | |
"mw,Malawi", | |
"my,Malaysia", | |
"mv,Maldives", | |
"ml,Mali", | |
"mt,Malta", | |
"mh,Marshall Islands", | |
"mq,Martinique", | |
"mr,Mauritania", | |
"mu,Mauritius", | |
"yt,Mayotte", | |
"mx,Mexico", | |
"fm,Micronesia; Federated States of", | |
"md,Moldova", | |
"mc,Monaco", | |
"mn,Mongolia", | |
"me,Montenegro", | |
"ms,Montserrat", | |
"ma,Morocco", | |
"mz,Mozambique", | |
"mm,Myanmar", | |
"na,Namibia", | |
"nr,Nauru", | |
"np,Nepal", | |
"nl,Netherlands", | |
"an,Netherlands Antilles", | |
"nc,New Caledonia", | |
"nz,New Zealand", | |
"ni,Nicaragua", | |
"ne,Niger", | |
"ng,Nigeria", | |
"nu,Niue", | |
"nf,Norfolk Island", | |
"mp,Northern Mariana Islands", | |
"no,Norway", | |
"om,Oman", | |
"pk,Pakistan", | |
"pw,Palau", | |
"ps,Palestinian Territory; Occupied", | |
"pa,Panama", | |
"pg,Papua New Guinea", | |
"py,Paraguay", | |
"pe,Peru", | |
"ph,Philippines", | |
"pn,Pitcairn", | |
"pl,Poland", | |
"pt,Portugal", | |
"pr,Puerto Rico", | |
"qa,Qatar", | |
"re,Réunion", | |
"ro,Romania", | |
"ru,Russian Federation", | |
"rw,Rwanda", | |
"bl,Saint-Barthélemy", | |
"sh,Saint Helena", | |
"kn,Saint Kitts and Nevis", | |
"lc,Saint Lucia", | |
"mf,Saint-Martin (French part)", | |
"pm,Saint Pierre and Miquelon", | |
"vc,Saint Vincent and Grenadines", | |
"ws,Samoa", | |
"sm,San Marino", | |
"st,Sao Tome and Principe", | |
"sa,Saudi Arabia", | |
"sn,Senegal", | |
"rs,Serbia", | |
"sc,Seychelles", | |
"sl,Sierra Leone", | |
"sg,Singapore", | |
"sx,Sint Maarten (Dutch part)", | |
"sk,Slovakia", | |
"si,Slovenia", | |
"sb,Solomon Islands", | |
"so,Somalia", | |
"za,South Africa", | |
"gs,South Georgia and the South Sandwich Islands", | |
"ss,South Sudan", | |
"es,Spain", | |
"lk,Sri Lanka", | |
"sd,Sudan", | |
"sr,Suriname *", | |
"sj,Svalbard and Jan Mayen Islands", | |
"sz,Swaziland", | |
"se,Sweden", | |
"ch,Switzerland", | |
"sy,Syrian Arab Republic (Syria)", | |
"tw,Taiwan", | |
"tj,Tajikistan", | |
"tz,Tanzania *; United Republic of", | |
"th,Thailand", | |
"tl,Timor-Leste", | |
"tg,Togo", | |
"tk,Tokelau", | |
"to,Tonga", | |
"tt,Trinidad and Tobago", | |
"tn,Tunisia", | |
"tr,Turkey", | |
"tm,Turkmenistan", | |
"tc,Turks and Caicos Islands", | |
"tv,Tuvalu", | |
"ug,Uganda", | |
"ua,Ukraine", | |
"ae,United Arab Emirates", | |
"gb,United Kingdom", | |
"us,United States of America", | |
"um,United States Minor Outlying Islands", | |
"uy,Uruguay", | |
"uz,Uzbekistan", | |
"vu,Vanuatu", | |
"ve,Venezuela (Bolivarian Republic of)", | |
"vn,Viet Nam", | |
"vi,Virgin Islands; US", | |
"wf,Wallis and Futuna Islands", | |
"eh,Western Sahara", | |
"ye,Yemen", | |
"zm,Zambia", | |
"zw,Zimbabwe", | |
}; | |
//language codes | |
char *lc_csv[] = { | |
"aa,Afar", | |
"ab,Abkhazian", | |
"ae,Avestan", | |
"af,Afrikaans", | |
"ak,Akan", | |
"am,Amharic", | |
"an,Aragonese", | |
"ar,Arabic", | |
"as,Assamese", | |
"av,Avaric", | |
"ay,Aymara", | |
"az,Azerbaijani", | |
"ba,Bashkir", | |
"be,Belarusian", | |
"bg,Bulgarian", | |
"bh,Bihari languages", | |
"bi,Bislama", | |
"bm,Bambara", | |
"bn,Bengali", | |
"bo,Tibetan", | |
"br,Breton", | |
"bs,Bosnian", | |
"ca,Catalan; Valencian", | |
"ce,Chechen", | |
"ch,Chamorro", | |
"co,Corsican", | |
"cr,Cree", | |
"cs,Czech", | |
"cu,Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic", | |
"cv,Chuvash", | |
"cy,Welsh", | |
"da,Danish", | |
"de,German", | |
"dv,Divehi; Dhivehi; Maldivian", | |
"dz,Dzongkha", | |
"ee,Ewe", | |
"el,\"Greek, Modern (1453-)\"", | |
"en,English", | |
"eo,Esperanto", | |
"es,Spanish; Castilian", | |
"et,Estonian", | |
"eu,Basque", | |
"fa,Persian", | |
"ff,Fulah", | |
"fi,Finnish", | |
"fj,Fijian", | |
"fo,Faroese", | |
"fr,French", | |
"fy,Western Frisian", | |
"ga,Irish", | |
"gd,Gaelic; Scottish Gaelic", | |
"gl,Galician", | |
"gn,Guarani", | |
"gu,Gujarati", | |
"gv,Manx", | |
"ha,Hausa", | |
"he,Hebrew", | |
"hi,Hindi", | |
"ho,Hiri Motu", | |
"hr,Croatian", | |
"ht,Haitian; Haitian Creole", | |
"hu,Hungarian", | |
"hy,Armenian", | |
"hz,Herero", | |
"ia,Interlingua (International Auxiliary Language Association)", | |
"id,Indonesian", | |
"ie,Interlingue; Occidental", | |
"ig,Igbo", | |
"ii,Sichuan Yi; Nuosu", | |
"ik,Inupiaq", | |
"io,Ido", | |
"is,Icelandic", | |
"it,Italian", | |
"iu,Inuktitut", | |
"ja,Japanese", | |
"jv,Javanese", | |
"ka,Georgian", | |
"kg,Kongo", | |
"ki,Kikuyu; Gikuyu", | |
"kj,Kuanyama; Kwanyama", | |
"kk,Kazakh", | |
"kl,Kalaallisut; Greenlandic", | |
"km,Central Khmer", | |
"kn,Kannada", | |
"ko,Korean", | |
"kr,Kanuri", | |
"ks,Kashmiri", | |
"ku,Kurdish", | |
"kv,Komi", | |
"kw,Cornish", | |
"ky,Kirghiz; Kyrgyz", | |
"la,Latin", | |
"lb,Luxembourgish; Letzeburgesch", | |
"lg,Ganda", | |
"li,Limburgan; Limburger; Limburgish", | |
"ln,Lingala", | |
"lo,Lao", | |
"lt,Lithuanian", | |
"lu,Luba-Katanga", | |
"lv,Latvian", | |
"mg,Malagasy", | |
"mh,Marshallese", | |
"mi,Maori", | |
"mk,Macedonian", | |
"ml,Malayalam", | |
"mn,Mongolian", | |
"mr,Marathi", | |
"ms,Malay", | |
"mt,Maltese", | |
"my,Burmese", | |
"na,Nauru", | |
"nb,\"Bokmål, Norwegian; Norwegian Bokmål\"", | |
"nd,\"Ndebele, North; North Ndebele\"", | |
"ne,Nepali", | |
"ng,Ndonga", | |
"nl,Dutch; Flemish", | |
"nn,\"Norwegian Nynorsk; Nynorsk, Norwegian\"", | |
"no,Norwegian", | |
"nr,\"Ndebele, South; South Ndebele\"", | |
"nv,Navajo; Navaho", | |
"ny,Chichewa; Chewa; Nyanja", | |
"oc,Occitan (post 1500); Provençal", | |
"oj,Ojibwa", | |
"om,Oromo", | |
"or,Oriya", | |
"os,Ossetian; Ossetic", | |
"pa,Panjabi; Punjabi", | |
"pi,Pali", | |
"pl,Polish", | |
"ps,Pushto; Pashto", | |
"pt,Portuguese", | |
"qu,Quechua", | |
"rm,Romansh", | |
"rn,Rundi", | |
"ro,Romanian; Moldavian; Moldovan", | |
"ru,Russian", | |
"rw,Kinyarwanda", | |
"sa,Sanskrit", | |
"sc,Sardinian", | |
"sd,Sindhi", | |
"se,Northern Sami", | |
"sg,Sango", | |
"si,Sinhala; Sinhalese", | |
"sk,Slovak", | |
"sl,Slovenian", | |
"sm,Samoan", | |
"sn,Shona", | |
"so,Somali", | |
"sq,Albanian", | |
"sr,Serbian", | |
"ss,Swati", | |
"st,\"Sotho, Southern\"", | |
"su,Sundanese", | |
"sv,Swedish", | |
"sw,Swahili", | |
"ta,Tamil", | |
"te,Telugu", | |
"tg,Tajik", | |
"th,Thai", | |
"ti,Tigrinya", | |
"tk,Turkmen", | |
"tl,Tagalog", | |
"tn,Tswana", | |
"to,Tonga (Tonga Islands)", | |
"tr,Turkish", | |
"ts,Tsonga", | |
"tt,Tatar", | |
"tw,Twi", | |
"ty,Tahitian", | |
"ug,Uighur; Uyghur", | |
"uk,Ukrainian", | |
"ur,Urdu", | |
"uz,Uzbek", | |
"ve,Venda", | |
"vi,Vietnamese", | |
"vo,Volapük", | |
"wa,Walloon", | |
"wo,Wolof", | |
"xh,Xhosa", | |
"yi,Yiddish", | |
"yo,Yoruba", | |
"za,Zhuang; Chuang", | |
"zh,Chinese", | |
"zu,Zulu", | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment