wareya · March 18, 2017 08:48
diff --git a/kssize.py b/kssize.py

 # Modified to spit out main script content in utf-8
 # use "Phiber's Kirikiri tool. File:Kikiriki.rar" from https://tlwiki.org/?title=Tools#KiriKiri2.2FKAG3 to extract script files from realta nua's vanilla .exe files
 # copy said scripts all into the same folder (ovewrite or don't copy files with the same name, do not rename) and delete non-script ones (menu, macro, subroutine, etc)

 # krkr line counter script
 #
 # How to run:
 #
 #    (1) Organize game scripts into directories one level below where this script resides.
 #    (2) Run the script, size data is printed to standard out
 #
 #    Script output is in the following format:
 #        <script_name> <line_count> <size_in_bytes>
 #
 #    Total sizes for script categories (based on directories) and grand total are also 
 #    listed and are prefixed by * and ** respectively. 
 #
 #    Line counting is a bit tricky with krkr scripts because it is difficult to define
 #    what exactly is a line from the scripts alone. This script attempts to deal with
 #    this by having different methods of counting (see COUNT_MODE below).

 import glob
 import os
 import re
 import sys


 # count mode
 #     1: count each text line in script file
 #     2: count line after upon reaching a non-text line following consecutive text lines
 #     4: count instances of [l]
 #     8: count instances of [r]

 COUNT_MODE = 1

 fmt = re.compile(r'\[[^\[\]]*\]')
 ruby3a = re.compile(r'(\[ruby char=)([^\]]*?)( text=)([^\]]*?)(\])')
 ruby3b = re.compile(r'(\[ruby char=)([^\]]*?)( text=")([^\]]*?)("\])')
 ruby2 = re.compile(r'(\[ruby text=)([^\]]*?)( char=)(.*?)(\])')
 ruby1 = re.compile(r'(\[ruby text=)([^ ]*?)(( .*?\])|(\]))')

 #unsure: retainhaze, stopdash
 etc = re.compile(r'\[(wsay|wacky|se|sestop|seloop|wait|chgfg|movefg|move|flicker|shock|fadein|p|transex|quake|dash|noise|say|stopnoise|wm|wdash|rep|stopdash|clfg|playstop|retainhaze|ld)(| .*?)\]')

 cmd = re.compile(r'^@.*')
 cmt = re.compile(r'^\;.*')
 lbl = re.compile(r'^\*.*')
 spk = re.compile(r'【.*】')

 def is_command(s):
 	return cmd.match(s)

 def is_comment(s):
 	return cmt.match(s)
 	
 def is_label(s):
 	return lbl.match(s)

 def is_iscript(s):
 	return is_command(s) and s.find('@iscript') == 0
 	
 def is_endscript(s):
 	return is_command(s) and s.find('@endscript') == 0
 	
 def is_text(s,in_script):
 	return not in_script and not (is_command(s) or is_label(s) or is_comment(s))

 def remove_formatting(s):
 	#chuuni
 	#these are misspelled in FSN in ways that literally break the greek writing system, so the "intended" name is here
 	s = s.replace("[atlas]", "Ατλας")
 	s = s.replace("[margos]", "Μαρδοξ")
 	s = s.replace("[aero]", "Αερο")
 	s = s.replace("[troya]", "Tροψα")
 	s = s.replace("[keraino]", "Κεραινο")
 	#out of encoding
 	s = s.replace("[szlig]", "ß")
 	s = s.replace("[uuml]", "ü")
 	s = s.replace("[auml]", "ä")
 	s = s.replace("[heart]", "❤")
 	s = s.replace("[XAuml]", "ö") #Ö in FSN but it should be ö
 	
 	#drawing
 	s = s.replace("[block len=12]", "████████████")
 	s = s.replace("[block len=9]", "█████████")
 	s = s.replace("[block len=3]", "███")
 	s = s.replace("[block len=2]", "██")
 	s = s.replace("[line25]", "―――――――――――――――――――――――――")
 	s = s.replace("[line24]", "――――――――――――――――――――――――")
 	s = s.replace("[line23]", "―――――――――――――――――――――――")
 	s = s.replace("[line22]", "――――――――――――――――――――――")
 	s = s.replace("[line21]", "―――――――――――――――――――――")
 	s = s.replace("[line20]", "――――――――――――――――――――")
 	s = s.replace("[line19]", "―――――――――――――――――――")
 	s = s.replace("[line18]", "――――――――――――――――――")
 	s = s.replace("[line17]", "―――――――――――――――――")
 	s = s.replace("[line16]", "――――――――――――――――")
 	s = s.replace("[line15]", "―――――――――――――――")
 	s = s.replace("[line14]", "――――――――――――――")
 	s = s.replace("[line13]", "―――――――――――――")
 	s = s.replace("[line12]", "――――――――――――")
 	s = s.replace("[line11]", "―――――――――――")
 	s = s.replace("[line10]", "――――――――――")
 	s = s.replace("[line9]", "―――――――――")
 	s = s.replace("[line8]", "――――――――")
 	s = s.replace("[line7]", "―――――――")
 	s = s.replace("[line6]", "――――――")
 	s = s.replace("[line5]", "―――――")
 	s = s.replace("[line4]", "――――")
 	s = s.replace("[line3]", "―――")
 	s = s.replace("[line2]", "――")
 	s = s.replace("[line1]", "―")
 	s = s.replace("[line len=8]", "――――――――")
 	s = s.replace("[line len=5]", "―――――")
 	s = s.replace("[line len=4]", "――――")
 	s = s.replace("[line len=3]", "―――")
 	
 	#newline type control
 	s = s.replace("[l]", "\n")
 	s = s.replace("[r]", "\n")
 	s = s.replace("[lr]", "\n")
 	s = s.replace("[rf]", "\n")
 	s = s.replace("[br]", "\n")
 	s = s.replace("[nolr]", "")
 	
 	#ruby
 	s = ruby3a.sub(r"《\4:\2》", s)
 	s = ruby3b.sub(r"《\4:\2》", s)
 	s = ruby2.sub(r"《\2:\4》", s)
 	s = ruby1.sub(r"《\2》", s)
 	
 	#other formatting
 	s = etc.sub(r"", s)
 	#s = ''.join(fmt.split(s))
 	
 	s = ''.join(cmd.split(s))
 	s = ''.join(cmt.split(s))
 	s = ''.join(lbl.split(s))
 	s = ''.join(spk.split(s))
 	return s

 outfile = open("out.txt", "w+", encoding="utf-8")

 def txtsize(fi):
 	in_script = False
 	lines = size = 0
 	prevline_t = ''
 	for line in fi.readlines():
 		line_s = line.strip()
 		if is_endscript(line_s):
 			in_script = False
 		if not in_script:
 			line_t = remove_formatting(line_s).strip()
 			if line_t:
 				try:
 					size += len(line_t.encode('cp932'))
 				except:
 					size += len(line_t.encode('utf-8'))
 			if COUNT_MODE & 0x1:
 				if line_t:
 					lines += 1
 					outfile.write("%s\n" % (line_t))
 			if COUNT_MODE & 0x2:
 				if not line_t and prevline_t:
 					lines += 1
 			if COUNT_MODE & 0x4:
 				if line_s.find('[l]') >= 0:
 					lines += 1
 			if COUNT_MODE & 0x8:
 				if line_s.find('[r]') >= 0:
 					lines += 1
 		if is_iscript(line_s):
 			in_script = True
 		prevline_t = line_t
 	return lines,size
 	
 def get_encoding(fname):
 	data = open(fname,'rb').read(2)
 	if data == b'\xFF\xFE' or data == b'\xFE\xFF':
 		return 'utf-16'
 	else:
 		return 'cp932'
 	
 if __name__ == '__main__':
 	glines = gsize = 0
 	for dirpath, dirnames, filenames in os.walk('.'):
 		if dirpath == '.':
 			continue
 		tlines = tsize = 0
 		for file in glob.glob(os.path.join(dirpath,'*.ks')):
 			lines,size = txtsize(open(file,'r',encoding=get_encoding(file)))
 			print('%s %6d %8d' % (os.path.split(file)[1].ljust(32),lines,size))
 			tlines += lines
 			tsize += size
 		print('*%s %6d %8d' % (os.path.split(dirpath)[1].ljust(31),tlines,tsize))
 		print('')
 		glines += tlines
 		gsize += tsize
 	print('**%s %6d %8d' % ('total'.ljust(30),glines,gsize))

	# Modified to spit out main script content in utf-8
	# use "Phiber's Kirikiri tool. File:Kikiriki.rar" from https://tlwiki.org/?title=Tools#KiriKiri2.2FKAG3 to extract script files from realta nua's vanilla .exe files
	# copy said scripts all into the same folder (ovewrite or don't copy files with the same name, do not rename) and delete non-script ones (menu, macro, subroutine, etc)

	# krkr line counter script
	#
	# How to run:
	#
	# (1) Organize game scripts into directories one level below where this script resides.
	# (2) Run the script, size data is printed to standard out
	#
	# Script output is in the following format:
	# <script_name> <line_count> <size_in_bytes>
	#
	# Total sizes for script categories (based on directories) and grand total are also
	# listed and are prefixed by * and ** respectively.
	#
	# Line counting is a bit tricky with krkr scripts because it is difficult to define
	# what exactly is a line from the scripts alone. This script attempts to deal with
	# this by having different methods of counting (see COUNT_MODE below).

	import glob
	import os
	import re
	import sys


	# count mode
	# 1: count each text line in script file
	# 2: count line after upon reaching a non-text line following consecutive text lines
	# 4: count instances of [l]
	# 8: count instances of [r]

	COUNT_MODE = 1

	fmt = re.compile(r'\[[^\[\]]*\]')
	ruby3a = re.compile(r'(\[ruby char=)([^\]]?)( text=)([^\]]?)(\])')
	ruby3b = re.compile(r'(\[ruby char=)([^\]]?)( text=")([^\]]?)("\])')
	ruby2 = re.compile(r'(\[ruby text=)([^\]]?)( char=)(.?)(\])')
	ruby1 = re.compile(r'(\[ruby text=)([^ ]?)(( .?\])\|(\]))')

	#unsure: retainhaze, stopdash
	etc = re.compile(r'\[(wsay\|wacky\|se\|sestop\|seloop\|wait\|chgfg\|movefg\|move\|flicker\|shock\|fadein\|p\|transex\|quake\|dash\|noise\|say\|stopnoise\|wm\|wdash\|rep\|stopdash\|clfg\|playstop\|retainhaze\|ld)(\| .*?)\]')

	cmd = re.compile(r'^@.*')
	cmt = re.compile(r'^\;.*')
	lbl = re.compile(r'^\.')
	spk = re.compile(r'【.*】')

	def is_command(s):
	return cmd.match(s)

	def is_comment(s):
	return cmt.match(s)

	def is_label(s):
	return lbl.match(s)

	def is_iscript(s):
	return is_command(s) and s.find('@iscript') == 0

	def is_endscript(s):
	return is_command(s) and s.find('@endscript') == 0

	def is_text(s,in_script):
	return not in_script and not (is_command(s) or is_label(s) or is_comment(s))

	def remove_formatting(s):
	#chuuni
	#these are misspelled in FSN in ways that literally break the greek writing system, so the "intended" name is here
	s = s.replace("[atlas]", "Ατλας")
	s = s.replace("[margos]", "Μαρδοξ")
	s = s.replace("[aero]", "Αερο")
	s = s.replace("[troya]", "Tροψα")
	s = s.replace("[keraino]", "Κεραινο")
	#out of encoding
	s = s.replace("[szlig]", "ß")
	s = s.replace("[uuml]", "ü")
	s = s.replace("[auml]", "ä")
	s = s.replace("[heart]", "❤")
	s = s.replace("[XAuml]", "ö") #Ö in FSN but it should be ö

	#drawing
	s = s.replace("[block len=12]", "████████████")
	s = s.replace("[block len=9]", "█████████")
	s = s.replace("[block len=3]", "███")
	s = s.replace("[block len=2]", "██")
	s = s.replace("[line25]", "―――――――――――――――――――――――――")
	s = s.replace("[line24]", "――――――――――――――――――――――――")
	s = s.replace("[line23]", "―――――――――――――――――――――――")
	s = s.replace("[line22]", "――――――――――――――――――――――")
	s = s.replace("[line21]", "―――――――――――――――――――――")
	s = s.replace("[line20]", "――――――――――――――――――――")
	s = s.replace("[line19]", "―――――――――――――――――――")
	s = s.replace("[line18]", "――――――――――――――――――")
	s = s.replace("[line17]", "―――――――――――――――――")
	s = s.replace("[line16]", "――――――――――――――――")
	s = s.replace("[line15]", "―――――――――――――――")
	s = s.replace("[line14]", "――――――――――――――")
	s = s.replace("[line13]", "―――――――――――――")
	s = s.replace("[line12]", "――――――――――――")
	s = s.replace("[line11]", "―――――――――――")
	s = s.replace("[line10]", "――――――――――")
	s = s.replace("[line9]", "―――――――――")
	s = s.replace("[line8]", "――――――――")
	s = s.replace("[line7]", "―――――――")
	s = s.replace("[line6]", "――――――")
	s = s.replace("[line5]", "―――――")
	s = s.replace("[line4]", "――――")
	s = s.replace("[line3]", "―――")
	s = s.replace("[line2]", "――")
	s = s.replace("[line1]", "―")
	s = s.replace("[line len=8]", "――――――――")
	s = s.replace("[line len=5]", "―――――")
	s = s.replace("[line len=4]", "――――")
	s = s.replace("[line len=3]", "―――")

	#newline type control
	s = s.replace("[l]", "\n")
	s = s.replace("[r]", "\n")
	s = s.replace("[lr]", "\n")
	s = s.replace("[rf]", "\n")
	s = s.replace("[br]", "\n")
	s = s.replace("[nolr]", "")

	#ruby
	s = ruby3a.sub(r"《\4:\2》", s)
	s = ruby3b.sub(r"《\4:\2》", s)
	s = ruby2.sub(r"《\2:\4》", s)
	s = ruby1.sub(r"《\2》", s)

	#other formatting
	s = etc.sub(r"", s)
	#s = ''.join(fmt.split(s))

	s = ''.join(cmd.split(s))
	s = ''.join(cmt.split(s))
	s = ''.join(lbl.split(s))
	s = ''.join(spk.split(s))
	return s

	outfile = open("out.txt", "w+", encoding="utf-8")

	def txtsize(fi):
	in_script = False
	lines = size = 0
	prevline_t = ''
	for line in fi.readlines():
	line_s = line.strip()
	if is_endscript(line_s):
	in_script = False
	if not in_script:
	line_t = remove_formatting(line_s).strip()
	if line_t:
	try:
	size += len(line_t.encode('cp932'))
	except:
	size += len(line_t.encode('utf-8'))
	if COUNT_MODE & 0x1:
	if line_t:
	lines += 1
	outfile.write("%s\n" % (line_t))
	if COUNT_MODE & 0x2:
	if not line_t and prevline_t:
	lines += 1
	if COUNT_MODE & 0x4:
	if line_s.find('[l]') >= 0:
	lines += 1
	if COUNT_MODE & 0x8:
	if line_s.find('[r]') >= 0:
	lines += 1
	if is_iscript(line_s):
	in_script = True
	prevline_t = line_t
	return lines,size

	def get_encoding(fname):
	data = open(fname,'rb').read(2)
	if data == b'\xFF\xFE' or data == b'\xFE\xFF':
	return 'utf-16'
	else:
	return 'cp932'

	if __name__ == '__main__':
	glines = gsize = 0
	for dirpath, dirnames, filenames in os.walk('.'):
	if dirpath == '.':
	continue
	tlines = tsize = 0
	for file in glob.glob(os.path.join(dirpath,'*.ks')):
	lines,size = txtsize(open(file,'r',encoding=get_encoding(file)))
	print('%s %6d %8d' % (os.path.split(file)[1].ljust(32),lines,size))
	tlines += lines
	tsize += size
	print('*%s %6d %8d' % (os.path.split(dirpath)[1].ljust(31),tlines,tsize))
	print('')
	glines += tlines
	gsize += tsize
	print('**%s %6d %8d' % ('total'.ljust(30),glines,gsize))
No results found