Created
November 28, 2023 03:03
-
-
Save RisingInIris2017/9ee52508fed267ee33febe435257445d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generated by GPT 4 | |
# Modified by RisingInIris2017 | |
# Licensed under Public Domain | |
import os | |
import re | |
# 是否清除空行。如果设为 True,需先备份文件 | |
REMOVE_BLANK_LINES = True | |
def remove_blank_lines(root_folder): | |
for root, dirs, files in os.walk(root_folder): | |
for file in files: | |
if file.endswith('.txt'): | |
file_path = os.path.join(root, file) | |
with open(file_path, 'r', encoding='utf-8') as f: | |
lines = f.readlines() | |
with open(file_path, 'w', encoding='utf-8') as f: | |
for line in lines: | |
if not line.strip(): | |
continue | |
f.write(line) | |
def count_chinese_characters_and_lines(root_folder): | |
total_lines = 0 | |
total_chinese_characters = 0 | |
# 假定文件以 UTF-8 编码,Minecraft 语言文件基本满足这个假设 | |
chinese_regex = re.compile(r'[\u4e00-\u9fff]') | |
for root, dirs, files in os.walk(root_folder): | |
for file in files: | |
if file.endswith('.txt'): | |
file_path = os.path.join(root, file) | |
with open(file_path, 'r', encoding='utf-8') as f: | |
for line in f: | |
total_lines += 1 | |
total_chinese_characters += len(chinese_regex.findall(line)) | |
return total_lines, total_chinese_characters | |
# 将待计数的文件所在的顶层目录填写在此处 | |
folder_path = r'' | |
if REMOVE_BLANK_LINES: | |
remove_blank_lines(folder_path) | |
lines, characters = count_chinese_characters_and_lines(folder_path) | |
print(f'总行数: {lines}') | |
print(f'汉字总数: {characters}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment