Created
March 18, 2017 08:48
-
-
Save wareya/7ea13a6092f8f0b1dbe2f4922e9d3dc4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Modified to spit out main script content in utf-8 | |
# use "Phiber's Kirikiri tool. File:Kikiriki.rar" from https://tlwiki.org/?title=Tools#KiriKiri2.2FKAG3 to extract script files from realta nua's vanilla .exe files | |
# copy said scripts all into the same folder (ovewrite or don't copy files with the same name, do not rename) and delete non-script ones (menu, macro, subroutine, etc) | |
# krkr line counter script | |
# | |
# How to run: | |
# | |
# (1) Organize game scripts into directories one level below where this script resides. | |
# (2) Run the script, size data is printed to standard out | |
# | |
# Script output is in the following format: | |
# <script_name> <line_count> <size_in_bytes> | |
# | |
# Total sizes for script categories (based on directories) and grand total are also | |
# listed and are prefixed by * and ** respectively. | |
# | |
# Line counting is a bit tricky with krkr scripts because it is difficult to define | |
# what exactly is a line from the scripts alone. This script attempts to deal with | |
# this by having different methods of counting (see COUNT_MODE below). | |
import glob | |
import os | |
import re | |
import sys | |
# count mode | |
# 1: count each text line in script file | |
# 2: count line after upon reaching a non-text line following consecutive text lines | |
# 4: count instances of [l] | |
# 8: count instances of [r] | |
COUNT_MODE = 1 | |
fmt = re.compile(r'\[[^\[\]]*\]') | |
ruby3a = re.compile(r'(\[ruby char=)([^\]]*?)( text=)([^\]]*?)(\])') | |
ruby3b = re.compile(r'(\[ruby char=)([^\]]*?)( text=")([^\]]*?)("\])') | |
ruby2 = re.compile(r'(\[ruby text=)([^\]]*?)( char=)(.*?)(\])') | |
ruby1 = re.compile(r'(\[ruby text=)([^ ]*?)(( .*?\])|(\]))') | |
#unsure: retainhaze, stopdash | |
etc = re.compile(r'\[(wsay|wacky|se|sestop|seloop|wait|chgfg|movefg|move|flicker|shock|fadein|p|transex|quake|dash|noise|say|stopnoise|wm|wdash|rep|stopdash|clfg|playstop|retainhaze|ld)(| .*?)\]') | |
cmd = re.compile(r'^@.*') | |
cmt = re.compile(r'^\;.*') | |
lbl = re.compile(r'^\*.*') | |
spk = re.compile(r'【.*】') | |
def is_command(s): | |
return cmd.match(s) | |
def is_comment(s): | |
return cmt.match(s) | |
def is_label(s): | |
return lbl.match(s) | |
def is_iscript(s): | |
return is_command(s) and s.find('@iscript') == 0 | |
def is_endscript(s): | |
return is_command(s) and s.find('@endscript') == 0 | |
def is_text(s,in_script): | |
return not in_script and not (is_command(s) or is_label(s) or is_comment(s)) | |
def remove_formatting(s): | |
#chuuni | |
#these are misspelled in FSN in ways that literally break the greek writing system, so the "intended" name is here | |
s = s.replace("[atlas]", "Ατλας") | |
s = s.replace("[margos]", "Μαρδοξ") | |
s = s.replace("[aero]", "Αερο") | |
s = s.replace("[troya]", "Tροψα") | |
s = s.replace("[keraino]", "Κεραινο") | |
#out of encoding | |
s = s.replace("[szlig]", "ß") | |
s = s.replace("[uuml]", "ü") | |
s = s.replace("[auml]", "ä") | |
s = s.replace("[heart]", "❤") | |
s = s.replace("[XAuml]", "ö") #Ö in FSN but it should be ö | |
#drawing | |
s = s.replace("[block len=12]", "████████████") | |
s = s.replace("[block len=9]", "█████████") | |
s = s.replace("[block len=3]", "███") | |
s = s.replace("[block len=2]", "██") | |
s = s.replace("[line25]", "―――――――――――――――――――――――――") | |
s = s.replace("[line24]", "――――――――――――――――――――――――") | |
s = s.replace("[line23]", "―――――――――――――――――――――――") | |
s = s.replace("[line22]", "――――――――――――――――――――――") | |
s = s.replace("[line21]", "―――――――――――――――――――――") | |
s = s.replace("[line20]", "――――――――――――――――――――") | |
s = s.replace("[line19]", "―――――――――――――――――――") | |
s = s.replace("[line18]", "――――――――――――――――――") | |
s = s.replace("[line17]", "―――――――――――――――――") | |
s = s.replace("[line16]", "――――――――――――――――") | |
s = s.replace("[line15]", "―――――――――――――――") | |
s = s.replace("[line14]", "――――――――――――――") | |
s = s.replace("[line13]", "―――――――――――――") | |
s = s.replace("[line12]", "――――――――――――") | |
s = s.replace("[line11]", "―――――――――――") | |
s = s.replace("[line10]", "――――――――――") | |
s = s.replace("[line9]", "―――――――――") | |
s = s.replace("[line8]", "――――――――") | |
s = s.replace("[line7]", "―――――――") | |
s = s.replace("[line6]", "――――――") | |
s = s.replace("[line5]", "―――――") | |
s = s.replace("[line4]", "――――") | |
s = s.replace("[line3]", "―――") | |
s = s.replace("[line2]", "――") | |
s = s.replace("[line1]", "―") | |
s = s.replace("[line len=8]", "――――――――") | |
s = s.replace("[line len=5]", "―――――") | |
s = s.replace("[line len=4]", "――――") | |
s = s.replace("[line len=3]", "―――") | |
#newline type control | |
s = s.replace("[l]", "\n") | |
s = s.replace("[r]", "\n") | |
s = s.replace("[lr]", "\n") | |
s = s.replace("[rf]", "\n") | |
s = s.replace("[br]", "\n") | |
s = s.replace("[nolr]", "") | |
#ruby | |
s = ruby3a.sub(r"《\4:\2》", s) | |
s = ruby3b.sub(r"《\4:\2》", s) | |
s = ruby2.sub(r"《\2:\4》", s) | |
s = ruby1.sub(r"《\2》", s) | |
#other formatting | |
s = etc.sub(r"", s) | |
#s = ''.join(fmt.split(s)) | |
s = ''.join(cmd.split(s)) | |
s = ''.join(cmt.split(s)) | |
s = ''.join(lbl.split(s)) | |
s = ''.join(spk.split(s)) | |
return s | |
outfile = open("out.txt", "w+", encoding="utf-8") | |
def txtsize(fi): | |
in_script = False | |
lines = size = 0 | |
prevline_t = '' | |
for line in fi.readlines(): | |
line_s = line.strip() | |
if is_endscript(line_s): | |
in_script = False | |
if not in_script: | |
line_t = remove_formatting(line_s).strip() | |
if line_t: | |
try: | |
size += len(line_t.encode('cp932')) | |
except: | |
size += len(line_t.encode('utf-8')) | |
if COUNT_MODE & 0x1: | |
if line_t: | |
lines += 1 | |
outfile.write("%s\n" % (line_t)) | |
if COUNT_MODE & 0x2: | |
if not line_t and prevline_t: | |
lines += 1 | |
if COUNT_MODE & 0x4: | |
if line_s.find('[l]') >= 0: | |
lines += 1 | |
if COUNT_MODE & 0x8: | |
if line_s.find('[r]') >= 0: | |
lines += 1 | |
if is_iscript(line_s): | |
in_script = True | |
prevline_t = line_t | |
return lines,size | |
def get_encoding(fname): | |
data = open(fname,'rb').read(2) | |
if data == b'\xFF\xFE' or data == b'\xFE\xFF': | |
return 'utf-16' | |
else: | |
return 'cp932' | |
if __name__ == '__main__': | |
glines = gsize = 0 | |
for dirpath, dirnames, filenames in os.walk('.'): | |
if dirpath == '.': | |
continue | |
tlines = tsize = 0 | |
for file in glob.glob(os.path.join(dirpath,'*.ks')): | |
lines,size = txtsize(open(file,'r',encoding=get_encoding(file))) | |
print('%s %6d %8d' % (os.path.split(file)[1].ljust(32),lines,size)) | |
tlines += lines | |
tsize += size | |
print('*%s %6d %8d' % (os.path.split(dirpath)[1].ljust(31),tlines,tsize)) | |
print('') | |
glines += tlines | |
gsize += tsize | |
print('**%s %6d %8d' % ('total'.ljust(30),glines,gsize)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment